From e3a4a0f4e9aae1acd706afe4d2dec5cc6f0721e8 Mon Sep 17 00:00:00 2001
From: Muhammad Ibrahim <rameenfar2013@proton.me>
Date: Fri, 22 Nov 2024 21:43:50 +0530
Subject: [PATCH] Added a top 100 movie list website which scrapes data from a
 website

---
 .../README.md                                 | 29 +++++++++++++++++++
 .../main.py                                   | 18 ++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 Web Development Python Projects/Starting Code - 100 movies to watch start/README.md
 create mode 100644 Web Development Python Projects/Starting Code - 100 movies to watch start/main.py

diff --git a/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md b/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md
new file mode 100644
index 0000000..8779315
--- /dev/null
+++ b/Web Development Python Projects/Starting Code - 100 movies to watch start/README.md	
@@ -0,0 +1,29 @@
+## 100 Movies that You Must Watch
+
+# Objective
+
+Scrape the top 100 movies of all time from a website. Generate a text file called `movies.txt` that lists the movie titles in ascending order (starting from 1). 
+The result should look something like this:
+
+```
+1) The Godfather
+2) The Empire Strikes Back
+3) The Dark Knight
+4) The Shawshank Redemption
+... and so on
+```
+The central idea behind this project is to be able to use BeautifulSoup to obtain some data - like movie titles - from a website like Empire's (or from, say Timeout or Stacker that have curated similar lists). 
+
+### ⚠️ Important: Use the Internet Archive's URL
+
+Since websites change very frequently, **use this link** 
+```
+URL = "https://web.archive.org/web/20200518073855/https://www.empireonline.com/movies/features/best-movies-2/"
+```
+from the Internet Archive's Wayback machine. That way your work will match the solution video.
+
+(Do *not* use https://www.empireonline.com/movies/features/best-movies-2/ which I've used in the screen recording)
+
+# Solution
+
+You can find the code from my walkthrough and solution as a downloadable .zip file in the course resources for this lesson. 
\ No newline at end of file
diff --git a/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py b/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py
new file mode 100644
index 0000000..60298f2
--- /dev/null
+++ b/Web Development Python Projects/Starting Code - 100 movies to watch start/main.py	
@@ -0,0 +1,18 @@
+import requests
+from bs4 import BeautifulSoup
+
+URL = "https://web.archive.org/web/20200518073855/https://www.empireonline.com/movies/features/best-movies-2/"
+
+# Write your code below this line 👇
+
+response = requests.get(URL)
+website_html = response.text
+
+soup = BeautifulSoup(website_html, "html.parser")
+all_movies = soup.find_all(name="h3", class_="title")
+movie_title = [movie.getText() for movie in all_movies]
+movies = movie_title[::-1]
+
+with open("movies.txt", mode="w") as file:
+    for movie in movies:
+        file.write(f"{movie}\n")
\ No newline at end of file