Skip to content

Commit

Permalink
- pending date, property type fields (#45)
Browse files Browse the repository at this point in the history
- alt photos bug fix (#57)
  • Loading branch information
ZacharyHampton committed Mar 14, 2024
1 parent d775540 commit 5c2498c
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ Property
│ ├── days_on_mls
│ ├── list_price
│ ├── list_date
│ ├── pending_date
│ ├── sold_price
│ ├── last_sold_date
│ ├── price_per_sqft
Expand Down
24 changes: 23 additions & 1 deletion homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@ class ListingType(Enum):
SOLD = "SOLD"


class PropertyType(Enum):
APARTMENT = "APARTMENT"
BUILDING = "BUILDING"
COMMERCIAL = "COMMERCIAL"
CONDO_TOWNHOME = "CONDO_TOWNHOME"
CONDO_TOWNHOME_ROWHOME_COOP = "CONDO_TOWNHOME_ROWHOME_COOP"
CONDO = "CONDO"
CONDOS = "CONDOS"
COOP = "COOP"
DUPLEX_TRIPLEX = "DUPLEX_TRIPLEX"
FARM = "FARM"
INVESTMENT = "INVESTMENT"
LAND = "LAND"
MOBILE = "MOBILE"
MULTI_FAMILY = "MULTI_FAMILY"
RENTAL = "RENTAL"
SINGLE_FAMILY = "SINGLE_FAMILY"
TOWNHOMES = "TOWNHOMES"
OTHER = "OTHER"


@dataclass
class Address:
street: str | None = None
Expand All @@ -36,7 +57,7 @@ class Address:
class Description:
primary_photo: str | None = None
alt_photos: list[str] | None = None
style: str | None = None
style: PropertyType | None = None
beds: int | None = None
baths_full: int | None = None
baths_half: int | None = None
Expand All @@ -58,6 +79,7 @@ class Property:

list_price: int | None = None
list_date: str | None = None
pending_date: str | None = None
last_sold_date: str | None = None
prc_sqft: int | None = None
hoa_fee: int | None = None
Expand Down
23 changes: 12 additions & 11 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed

from .. import Scraper
from ..models import Property, Address, ListingType, Description
from ..models import Property, Address, ListingType, Description, PropertyType


class RealtorScraper(Scraper):
Expand Down Expand Up @@ -84,11 +84,10 @@ def handle_listing(self, listing_id: str) -> list[Property]:
garage
permalink
}
primary_photo {
href
}
photos {
href
media {
photos {
href
}
}
}
}"""
Expand Down Expand Up @@ -120,9 +119,11 @@ def handle_listing(self, listing_id: str) -> list[Property]:
"list_date") else None
last_sold_date_str = property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get(
"sold_date") else None
pending_date_str = property_info["pending_date"].split("T")[0] if property_info.get("pending_date") else None

list_date = datetime.strptime(list_date_str, "%Y-%m-%d") if list_date_str else None
last_sold_date = datetime.strptime(last_sold_date_str, "%Y-%m-%d") if last_sold_date_str else None
pending_date = datetime.strptime(pending_date_str, "%Y-%m-%d") if pending_date_str else None
today = datetime.now()

days_on_mls = None
Expand Down Expand Up @@ -150,6 +151,7 @@ def handle_listing(self, listing_id: str) -> list[Property]:
and property_info["basic"].get("sqft")
else None,
last_sold_date=last_sold_date,
pending_date=pending_date,
latitude=property_info["address"]["location"]["coordinate"].get("lat")
if able_to_get_lat_long
else None,
Expand All @@ -158,9 +160,7 @@ def handle_listing(self, listing_id: str) -> list[Property]:
else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg",
"od-w480_h360_x2.webp?w=1080&q=75"),
alt_photos=self.process_alt_photos(property_info.get("photos", [])),
alt_photos=self.process_alt_photos(property_info.get("media", {}).get("photos", [])),
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
Expand Down Expand Up @@ -298,6 +298,7 @@ def general_search(
count
total
results {
pending_date
property_id
list_date
status
Expand All @@ -310,6 +311,7 @@ def general_search(
is_pending
}
description {
type
sqft
beds
baths_full
Expand Down Expand Up @@ -663,7 +665,6 @@ def _parse_address(self, result: dict, search_type):

@staticmethod
def _parse_description(result: dict) -> Description:

description_data = result.get("description", {})

if description_data is None or not isinstance(description_data, dict):
Expand All @@ -683,7 +684,7 @@ def _parse_description(result: dict) -> Description:
return Description(
primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
style=style,
style=PropertyType(style) if style else None,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),
baths_half=description_data.get("baths_half"),
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.13"
version = "0.3.14"
description = "Real estate scraping library"
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/HomeHarvest"
Expand Down

0 comments on commit 5c2498c

Please sign in to comment.