diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 98709fe..6787204 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -34,6 +34,8 @@ class Address: @dataclass class Description: + primary_photo: str | None = None + alt_photos: list[str] | None = None style: str | None = None beds: int | None = None baths_full: int | None = None diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 2c493a5..8fc6770 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -84,6 +84,12 @@ def handle_listing(self, listing_id: str) -> list[Property]: garage permalink } + primary_photo { + href + } + photos { + href + } } }""" @@ -152,6 +158,8 @@ def handle_listing(self, listing_id: str) -> list[Property]: else None, address=self._parse_address(property_info, search_type="handle_listing"), description=Description( + primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75"), + alt_photos=self.process_alt_photos(property_info.get("photos", [])), style=property_info["basic"].get("type", "").upper(), beds=property_info["basic"].get("beds"), baths_full=property_info["basic"].get("baths_full"), @@ -247,6 +255,12 @@ def handle_address(self, property_id: str) -> list[Property]: units year_built } + primary_photo { + href + } + photos { + href + } } }""" @@ -334,6 +348,12 @@ def general_search( name } } + primary_photo { + href + } + photos { + href + } } } }""" @@ -621,6 +641,7 @@ def _parse_address(self, result: dict, search_type): @staticmethod def _parse_description(result: dict) -> Description: + description_data = result.get("description", {}) if description_data is None or not isinstance(description_data, dict): @@ -630,7 +651,16 @@ def _parse_description(result: dict) -> Description: if style is not None: style = style.upper() + primary_photo = "" + if result and "primary_photo" in result: + primary_photo_info = result["primary_photo"] + if primary_photo_info and "href" in primary_photo_info: + primary_photo_href = primary_photo_info["href"] + primary_photo = primary_photo_href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75") + return Description( + primary_photo=primary_photo, + alt_photos=RealtorScraper.process_alt_photos(result.get("photos")), style=style, beds=description_data.get("beds"), baths_full=description_data.get("baths_full"), @@ -643,6 +673,7 @@ def _parse_description(result: dict) -> Description: stories=description_data.get("stories"), ) + @staticmethod def calculate_days_on_mls(result: dict) -> Optional[int]: list_date_str = result.get("list_date") @@ -661,3 +692,16 @@ def calculate_days_on_mls(result: dict) -> Optional[int]: days = (today - list_date).days if days >= 0: return days + + @staticmethod + def process_alt_photos(photos_info): + try: + alt_photos = [] + if photos_info: + for photo_info in photos_info: + href = photo_info.get("href", "") + alt_photo_href = href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75") + alt_photos.append(alt_photo_href) + return alt_photos + except Exception: + pass diff --git a/homeharvest/utils.py b/homeharvest/utils.py index 27effef..fe5b885 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -5,6 +5,8 @@ ordered_properties = [ "property_url", + "primary_photo", + "alt_photos", "mls", "mls_id", "status", @@ -49,6 +51,8 @@ def process_result(result: Property) -> pd.DataFrame: prop_data["price_per_sqft"] = prop_data["prc_sqft"] description = result.description + prop_data["primary_photo"] = description.primary_photo + prop_data["alt_photos"] = ", ".join(description.alt_photos) prop_data["style"] = description.style prop_data["beds"] = description.beds prop_data["full_baths"] = description.baths_full diff --git a/pyproject.toml b/pyproject.toml index 1d3f972..fa9e788 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.9" +version = "0.3.10" description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest"