Skip to content

Commit

Permalink
Merge pull request #8 from ZacharyHampton/fix/zillow-location-validation
Browse files Browse the repository at this point in the history
- zillow location validation
  • Loading branch information
ZacharyHampton committed Sep 19, 2023
2 parents 087854a + 30e5108 commit 02d112e
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 14 deletions.
23 changes: 11 additions & 12 deletions homeharvest/core/scrapers/zillow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,26 @@
class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type

if not self.is_plausible_location(self.location):
raise NoResultsFound("Invalid location input: {}".format(self.location))

if self.listing_type == ListingType.FOR_SALE:
self.url = f"https://www.zillow.com/homes/for_sale/{self.location}_rb/"
elif self.listing_type == ListingType.FOR_RENT:
self.url = f"https://www.zillow.com/homes/for_rent/{self.location}_rb/"
else:
self.url = f"https://www.zillow.com/homes/recently_sold/{self.location}_rb/"

@staticmethod
def is_plausible_location(location: str) -> bool:
blocks = location.split()
for block in blocks:
if (
any(char.isdigit() for char in block)
and any(char.isalpha() for char in block)
and len(block) > 6
):
return False
return True
def is_plausible_location(self, location: str) -> bool:
url = ('https://www.zillowstatic.com/autocomplete/v3/suggestions?q={'
'}&abKey=6666272a-4b99-474c-b857-110ec438732b&clientId=homepage-render').format(
location
)

response = self.session.get(url)

return response.json()['results'] != []

def search(self):
resp = self.session.get(self.url, headers=self._get_headers())
Expand Down
27 changes: 26 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.2.1"
version = "0.2.2"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/ZacharyHampton/HomeHarvest"
Expand All @@ -10,6 +10,7 @@ readme = "README.md"
python = "^3.10"
requests = "^2.31.0"
pandas = "^2.1.0"
openpyxl = "^3.1.2"


[tool.poetry.group.dev.dependencies]
Expand Down

0 comments on commit 02d112e

Please sign in to comment.