Skip to content

Commit

Permalink
fix: use zillow backend ep
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Sep 18, 2023
1 parent 905cfca commit dc8c159
Show file tree
Hide file tree
Showing 4 changed files with 342 additions and 184 deletions.
106 changes: 55 additions & 51 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.zillow import ZillowScraper
from .core.scrapers.models import ListingType, Property, Building, SiteName
from .core.scrapers.models import ListingType, Property, SiteName
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType
from typing import Union
Expand All @@ -25,60 +25,62 @@ def validate_input(site_name: str, listing_type: str) -> None:
)


def get_ordered_properties(result: Union[Building, Property]) -> list[str]:
if isinstance(result, Property):
return [
"listing_type",
"address_one",
"city",
"state",
"zip_code",
"address_two",
"url",
"property_type",
"price",
"beds",
"baths",
"square_feet",
"price_per_square_foot",
"lot_size",
"stories",
"year_built",
"agent_name",
"mls_id",
"description",
]
elif isinstance(result, Building):
return [
"address_one",
"city",
"state",
"zip_code",
"address_two",
"url",
"num_units",
"min_unit_price",
"max_unit_price",
"avg_unit_price",
"listing_type",
]
return []


def process_result(result: Union[Building, Property]) -> pd.DataFrame:
def get_ordered_properties(result: Property) -> list[str]:
return [
"property_url",
"site_name",
"listing_type",
"property_type",
"status_text",
"currency",
"price",
"apt_min_price",
"tax_assessed_value",
"square_feet",
"price_per_sqft",
"beds",
"baths",
"lot_area_value",
"lot_area_unit",
"street_address",
"unit",
"city",
"state",
"zip_code",
"country",
"posted_time",
"bldg_min_beds",
"bldg_min_baths",
"bldg_min_area",
"bldg_unit_count",
"bldg_name",
"stories",
"year_built",
"agent_name",
"mls_id",
"description",
"img_src",
"latitude",
"longitude",
]


def process_result(result: Property) -> pd.DataFrame:
prop_data = result.__dict__

address_data = prop_data["address"]
prop_data["site_name"] = prop_data["site_name"].value
prop_data["listing_type"] = prop_data["listing_type"].value
prop_data["listing_type"] = prop_data["listing_type"].value.lower()
prop_data["property_type"] = prop_data["property_type"].value.lower()
prop_data["address_one"] = address_data.address_one
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip_code
prop_data["address_two"] = address_data.address_two
if "address" in prop_data:
address_data = prop_data["address"]
prop_data["street_address"] = address_data.street_address
prop_data["unit"] = address_data.unit
prop_data["city"] = address_data.city
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip_code
prop_data["country"] = address_data.country

del prop_data["address"]
del prop_data["address"]

properties_df = pd.DataFrame([prop_data])
properties_df = properties_df[get_ordered_properties(result)]
Expand All @@ -90,7 +92,7 @@ def scrape_property(
location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
) -> Union[list[Building], list[Property]]:
) -> list[Property]:
validate_input(site_name, listing_type)

scraper_input = ScraperInput(
Expand All @@ -103,5 +105,7 @@ def scrape_property(
results = site.search()

properties_dfs = [process_result(result) for result in results]
if not properties_dfs:
return pd.DataFrame()

return pd.concat(properties_dfs, ignore_index=True)
66 changes: 40 additions & 26 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,28 @@ class SiteName(Enum):


class ListingType(Enum):
FOR_SALE = "for_sale"
FOR_RENT = "for_rent"
SOLD = "sold"
FOR_SALE = "FOR_SALE"
FOR_RENT = "FOR_RENT"
SOLD = "SOLD"


class PropertyType(Enum):
HOUSE = "HOUSE"
BUILDING = "BUILDING"
CONDO = "CONDO"
TOWNHOUSE = "TOWNHOUSE"
SINGLE_FAMILY = "SINGLE_FAMILY"
MULTI_FAMILY = "MULTI_FAMILY"
MANUFACTURED = "MANUFACTURED"
NEW_CONSTRUCTION = "NEW_CONSTRUCTION"
APARTMENT = "APARTMENT"
APARTMENTS = "APARTMENTS"
LAND = "LAND"
LOT = "LOT"
OTHER = "OTHER"

BLANK = "BLANK"

@classmethod
def from_int_code(cls, code):
mapping = {
Expand All @@ -38,48 +44,56 @@ def from_int_code(cls, code):
13: cls.SINGLE_FAMILY,
}

return mapping.get(code, cls.OTHER)
return mapping.get(code, cls.BLANK)


@dataclass
class Address:
address_one: str
street_address: str
city: str
state: str
zip_code: str

address_two: str | None = None
unit: str
country: str | None = None


@dataclass()
class Realty:
@dataclass
class Property:
property_url: str
site_name: SiteName
listing_type: ListingType
property_type: PropertyType
address: Address
url: str
listing_type: ListingType | None = None


@dataclass
class Property(Realty):
# house for sale
price: int | None = None
tax_assessed_value: int | None = None
currency: str | None = None
square_feet: int | None = None
beds: int | None = None
baths: float | None = None
lot_area_value: float | None = None
lot_area_unit: str | None = None
stories: int | None = None
year_built: int | None = None
square_feet: int | None = None
price_per_square_foot: int | None = None
price_per_sqft: int | None = None
year_built: int | None = None
mls_id: str | None = None

agent_name: str | None = None
property_type: PropertyType | None = None
lot_size: int | None = None
img_src: str | None = None
description: str | None = None


@dataclass
class Building(Realty):
num_units: int | None = None
min_unit_price: int | None = None
max_unit_price: int | None = None
avg_unit_price: int | None = None
status_text: str | None = None
latitude: float | None = None
longitude: float | None = None
posted_time: str | None = None

# building for sale
bldg_name: str | None = None
bldg_unit_count: int | None = None
bldg_min_beds: int | None = None
bldg_min_baths: float | None = None
bldg_min_area: int | None = None

# apt
apt_min_price: int | None = None
Loading

0 comments on commit dc8c159

Please sign in to comment.