Skip to content

Commit

Permalink
fix(zillow): test case
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Sep 19, 2023
1 parent 80186ee commit 62e3321
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 13 deletions.
12 changes: 9 additions & 3 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def _scrape_single_site(
results = site.search()

properties_dfs = [process_result(result) for result in results]
properties_dfs = [df.dropna(axis=1, how='all') for df in properties_dfs if not df.empty]
properties_dfs = [
df.dropna(axis=1, how="all") for df in properties_dfs if not df.empty
]
if not properties_dfs:
return pd.DataFrame()

Expand Down Expand Up @@ -147,7 +149,9 @@ def scrape_property(
else:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
executor.submit(
_scrape_single_site, location, s_name, listing_type
): s_name
for s_name in site_name
}

Expand All @@ -169,5 +173,7 @@ def scrape_property(
if col not in final_df.columns:
final_df[col] = None

final_df = final_df.drop_duplicates(subset=["street_address", "city", "unit"], keep="first")
final_df = final_df.drop_duplicates(
subset=["street_address", "city", "unit"], keep="first"
)
return final_df
16 changes: 14 additions & 2 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,20 @@ def handle_area(
unit=parse_unit(result["location"]["address"]["unit"]),
country="USA",
),
latitude=result["location"]["address"]["coordinate"]["lat"] if result and result.get("location") and result["location"].get("address") and result["location"]["address"].get("coordinate") and "lat" in result["location"]["address"]["coordinate"] else None,
longitude=result["location"]["address"]["coordinate"]["lon"] if result and result.get("location") and result["location"].get("address") and result["location"]["address"].get("coordinate") and "lon" in result["location"]["address"]["coordinate"] else None,
latitude=result["location"]["address"]["coordinate"]["lat"]
if result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
and "lat" in result["location"]["address"]["coordinate"]
else None,
longitude=result["location"]["address"]["coordinate"]["lon"]
if result
and result.get("location")
and result["location"].get("address")
and result["location"]["address"].get("coordinate")
and "lon" in result["location"]["address"]["coordinate"]
else None,
site_name=self.site_name,
property_url="https://www.realtor.com/realestateandhomes-detail/"
+ result["property_id"],
Expand Down
14 changes: 10 additions & 4 deletions homeharvest/core/scrapers/redfin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ def get_region_type(match_type: str):
elif match_type == "1":
return "address" #: address, needs to be handled differently

if "exactMatch" not in response_json['payload']:
raise NoResultsFound("No results found for location: {}".format(self.location))
if "exactMatch" not in response_json["payload"]:
raise NoResultsFound(
"No results found for location: {}".format(self.location)
)

if response_json["payload"]["exactMatch"] is not None:
target = response_json["payload"]["exactMatch"]
Expand Down Expand Up @@ -98,8 +100,12 @@ def get_value(key: str) -> Any | None:
price_per_sqft=get_value("pricePerSqFt"),
price=get_value("price"),
mls_id=get_value("mlsId"),
latitude=home["latLong"]["latitude"] if "latLong" in home and "latitude" in home["latLong"] else None,
longitude = home["latLong"]["longitude"] if "latLong" in home and "longitude" in home["latLong"] else None
latitude=home["latLong"]["latitude"]
if "latLong" in home and "latitude" in home["latLong"]
else None,
longitude=home["latLong"]["longitude"]
if "latLong" in home and "longitude" in home["latLong"]
else None,
)

def _parse_building(self, building: dict) -> Property:
Expand Down
17 changes: 16 additions & 1 deletion homeharvest/core/scrapers/zillow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
import re
import json
import string
from .. import Scraper
from ....utils import parse_address_two, parse_unit
from ....exceptions import GeoCoordsNotFound, NoResultsFound
from ..models import Property, Address, ListingType, PropertyType, SiteName
from ..models import Property, Address, ListingType, PropertyType


class ZillowScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)
self.listing_type = scraper_input.listing_type
if not self.is_plausible_location(self.location):
raise NoResultsFound("Invalid location input: {}".format(self.location))
if self.listing_type == ListingType.FOR_SALE:
self.url = f"https://www.zillow.com/homes/for_sale/{self.location}_rb/"
elif self.listing_type == ListingType.FOR_RENT:
self.url = f"https://www.zillow.com/homes/for_rent/{self.location}_rb/"
else:
self.url = f"https://www.zillow.com/homes/recently_sold/{self.location}_rb/"

@staticmethod
def is_plausible_location(location: str) -> bool:
blocks = location.split()
for block in blocks:
if (
any(char.isdigit() for char in block)
and any(char.isalpha() for char in block)
and len(block) > 6
):
return False
return True

def search(self):
resp = self.session.get(self.url, headers=self._get_headers())
resp.raise_for_status()
Expand Down
7 changes: 6 additions & 1 deletion tests/test_realtor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from homeharvest import scrape_property
from homeharvest.exceptions import InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)


def test_realtor():
Expand Down
7 changes: 6 additions & 1 deletion tests/test_redfin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from homeharvest import scrape_property
from homeharvest.exceptions import InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)


def test_redfin():
Expand Down
7 changes: 6 additions & 1 deletion tests/test_zillow.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from homeharvest import scrape_property
from homeharvest.exceptions import InvalidSite, InvalidListingType, NoResultsFound, GeoCoordsNotFound
from homeharvest.exceptions import (
InvalidSite,
InvalidListingType,
NoResultsFound,
GeoCoordsNotFound,
)


def test_zillow():
Expand Down

0 comments on commit 62e3321

Please sign in to comment.