From e3a4a0f4e9aae1acd706afe4d2dec5cc6f0721e8 Mon Sep 17 00:00:00 2001 From: Muhammad Ibrahim Date: Fri, 22 Nov 2024 21:43:50 +0530 Subject: [PATCH] Added a top 100 movie list website which scrapes data from a website --- .../README.md | 29 +++++++++++++++++++ .../main.py | 18 ++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 Web Development Python Projects/Starting Code - 100 movies to watch start/README.md create mode 100644 Web Development Python Projects/Starting Code - 100 movies to watch start/main.py diff --git a/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md b/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md new file mode 100644 index 0000000..8779315 --- /dev/null +++ b/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md @@ -0,0 +1,29 @@ +## 100 Movies that You Must Watch + +# Objective + +Scrape the top 100 movies of all time from a website. Generate a text file called `movies.txt` that lists the movie titles in ascending order (starting from 1). +The result should look something like this: + +``` +1) The Godfather +2) The Empire Strikes Back +3) The Dark Knight +4) The Shawshank Redemption +... and so on +``` +The central idea behind this project is to be able to use BeautifulSoup to obtain some data - like movie titles - from a website like Empire's (or from, say Timeout or Stacker that have curated similar lists). + +### ⚠️ Important: Use the Internet Archive's URL + +Since websites change very frequently, **use this link** +``` +URL = "https://web.archive.org/web/20200518073855/https://www.empireonline.com/movies/features/best-movies-2/" +``` +from the Internet Archive's Wayback machine. That way your work will match the solution video. + +(Do *not* use https://www.empireonline.com/movies/features/best-movies-2/ which I've used in the screen recording) + +# Solution + +You can find the code from my walkthrough and solution as a downloadable .zip file in the course resources for this lesson. \ No newline at end of file diff --git a/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py b/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py new file mode 100644 index 0000000..60298f2 --- /dev/null +++ b/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py @@ -0,0 +1,18 @@ +import requests +from bs4 import BeautifulSoup + +URL = "https://web.archive.org/web/20200518073855/https://www.empireonline.com/movies/features/best-movies-2/" + +# Write your code below this line 👇 + +response = requests.get(URL) +website_html = response.text + +soup = BeautifulSoup(website_html, "html.parser") +all_movies = soup.find_all(name="h3", class_="title") +movie_title = [movie.getText() for movie in all_movies] +movies = movie_title[::-1] + +with open("movies.txt", mode="w") as file: + for movie in movies: + file.write(f"{movie}\n") \ No newline at end of file