Added a Zillow rental data code
This commit is contained in:
parent
160fb9b749
commit
d0672d57e2
68
Visual Studio Code Projects/zillow-rental-data.py
Normal file
68
Visual Studio Code Projects/zillow-rental-data.py
Normal file
@ -0,0 +1,68 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
import time
|
||||
|
||||
# Part 1 - Scrape the links, addresses, and prices of the rental properties
|
||||
|
||||
header = {
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
|
||||
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8"
|
||||
}
|
||||
|
||||
# Use our Zillow-Clone website (instead of Zillow.com)
|
||||
response = requests.get("https://appbrewery.github.io/Zillow-Clone/", headers=header)
|
||||
|
||||
data = response.text
|
||||
soup = BeautifulSoup(data, "html.parser")
|
||||
|
||||
# Create a list of all the links on the page using a CSS Selector
|
||||
all_link_elements = soup.select(".StyledPropertyCardDataWrapper a")
|
||||
# Python list comprehension (covered in Day 26)
|
||||
all_links = [link["href"] for link in all_link_elements]
|
||||
print(f"There are {len(all_links)} links to individual listings in total: \n")
|
||||
print(all_links)
|
||||
|
||||
# Create a list of all the addresses on the page using a CSS Selector
|
||||
# Remove newlines \n, pipe symbols |, and whitespaces to clean up the address data
|
||||
all_address_elements = soup.select(".StyledPropertyCardDataWrapper address")
|
||||
all_addresses = [address.get_text().replace(" | ", " ").strip() for address in all_address_elements]
|
||||
print(f"\n After having been cleaned up, the {len(all_addresses)} addresses now look like this: \n")
|
||||
print(all_addresses)
|
||||
|
||||
# Create a list of all the prices on the page using a CSS Selector
|
||||
# Get a clean dollar price and strip off any "+" symbols and "per month" /mo abbreviation
|
||||
all_price_elements = soup.select(".PropertyCardWrapper span")
|
||||
all_prices = [price.get_text().replace("/mo", "").split("+")[0] for price in all_price_elements if "$" in price.text]
|
||||
print(f"\n After having been cleaned up, the {len(all_prices)} prices now look like this: \n")
|
||||
print(all_prices)
|
||||
|
||||
|
||||
# Part 2 - Fill in the Google Form using Selenium
|
||||
|
||||
# Optional - Keep the browser open (helps diagnose issues if the script crashes)
|
||||
chrome_options = webdriver.ChromeOptions()
|
||||
chrome_options.add_experimental_option("detach", True)
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
for n in range(len(all_links)):
|
||||
# TODO: Add fill in the link to your own Google From
|
||||
driver.get("YOUR_GOOGLE_FORM_LINK_HERE")
|
||||
time.sleep(2)
|
||||
|
||||
# Use the xpath to select the "short answer" fields in your Google Form.
|
||||
# Note, your xpath might be different if you created a different form.
|
||||
address = driver.find_element(by=By.XPATH,
|
||||
value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div[2]/div/div[1]/div/div[1]/input')
|
||||
price = driver.find_element(by=By.XPATH,
|
||||
value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/div[1]/div/div[1]/input')
|
||||
link = driver.find_element(by=By.XPATH,
|
||||
value='//*[@id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div[2]/div/div[1]/div/div[1]/input')
|
||||
submit_button = driver.find_element(by=By.XPATH,
|
||||
value='//*[@id="mG61Hd"]/div[2]/div/div[3]/div[1]/div[1]/div')
|
||||
|
||||
address.send_keys(all_addresses[n])
|
||||
price.send_keys(all_prices[n])
|
||||
link.send_keys(all_links[n])
|
||||
submit_button.click()
|
||||
Loading…
Reference in New Issue
Block a user