Added a Zillow rental data code

2024-11-30 14:26:14 +05:30 · 2024-11-30 14:26:14 +05:30 · d0672d57e2
commit d0672d57e2
parent 160fb9b749
1 changed files with 68 additions and 0 deletions
--- a/Projects/zillow-rental-data.py
+++ b/Projects/zillow-rental-data.py
@ -0,0 +1,68 @@
+from bs4 import BeautifulSoup
+import requests
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+import time
+
+# Part 1 - Scrape the links, addresses, and prices of the rental properties
+
+header = {
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
+    "Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
+}
+
+# Use our Zillow-Clone website (instead of Zillow.com)
+response = requests.get("https://appbrewery.github.io/Zillow-Clone/", headers=header)
+
+data = response.text
+soup = BeautifulSoup(data, "html.parser")
+
+# Create a list of all the links on the page using a CSS Selector
+all_link_elements = soup.select(".StyledPropertyCardDataWrapper a") 
+# Python list comprehension (covered in Day 26)
+all_links = [link["href"] for link in all_link_elements] 
+print(f"There are {len(all_links)} links to individual listings in total: \n")
+print(all_links)
+
+# Create a list of all the addresses on the page using a CSS Selector
+# Remove newlines \n, pipe symbols |, and whitespaces to clean up the address data
+all_address_elements = soup.select(".StyledPropertyCardDataWrapper address")
+all_addresses = [address.get_text().replace(" | ", " ").strip() for address in all_address_elements]
+print(f"\n After having been cleaned up, the {len(all_addresses)} addresses now look like this: \n")
+print(all_addresses)
+
+# Create a list of all the prices on the page using a CSS Selector
+# Get a clean dollar price and strip off any "+" symbols and "per month" /mo abbreviation
+all_price_elements = soup.select(".PropertyCardWrapper span")
+all_prices = [price.get_text().replace("/mo", "").split("+")[0] for price in all_price_elements if "$" in price.text]
+print(f"\n After having been cleaned up, the {len(all_prices)} prices now look like this: \n")
+print(all_prices)
+
+
+# Part 2 - Fill in the Google Form using Selenium
+
+# Optional - Keep the browser open (helps diagnose issues if the script crashes)
+chrome_options = webdriver.ChromeOptions()
+chrome_options.add_experimental_option("detach", True)
+driver = webdriver.Chrome(options=chrome_options)
+
+for n in range(len(all_links)):
+    # TODO: Add fill in the link to your own Google From
+    driver.get("YOUR_GOOGLE_FORM_LINK_HERE")
+    time.sleep(2)
+
+    # Use the xpath to select the "short answer" fields in your Google Form. 
+    # Note, your xpath might be different if you created a different form.
+    address = driver.find_element(by=By.XPATH, 
+                        value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
+    price = driver.find_element(by=By.XPATH, 
+                        value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
+    link = driver.find_element(by=By.XPATH, 
+                        value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
+    submit_button = driver.find_element(by=By.XPATH, 
+                        value='//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div[1]/div')
+    
+    address.send_keys(all_addresses[n])
+    price.send_keys(all_prices[n])
+    link.send_keys(all_links[n])
+    submit_button.click()