Skip to content

Commit

Permalink
Merge pull request #43 from Bunsly/add_photos
Browse files Browse the repository at this point in the history
Add photos
  • Loading branch information
cullenwatson committed Nov 25, 2023
2 parents fa507db + 4676ec9 commit 19f23c9
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 1 deletion.
2 changes: 2 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Address:

@dataclass
class Description:
primary_photo: str | None = None
alt_photos: list[str] | None = None
style: str | None = None
beds: int | None = None
baths_full: int | None = None
Expand Down
44 changes: 44 additions & 0 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def handle_listing(self, listing_id: str) -> list[Property]:
garage
permalink
}
primary_photo {
href
}
photos {
href
}
}
}"""

Expand Down Expand Up @@ -152,6 +158,8 @@ def handle_listing(self, listing_id: str) -> list[Property]:
else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75"),
alt_photos=self.process_alt_photos(property_info.get("photos", [])),
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
Expand Down Expand Up @@ -247,6 +255,12 @@ def handle_address(self, property_id: str) -> list[Property]:
units
year_built
}
primary_photo {
href
}
photos {
href
}
}
}"""

Expand Down Expand Up @@ -334,6 +348,12 @@ def general_search(
name
}
}
primary_photo {
href
}
photos {
href
}
}
}
}"""
Expand Down Expand Up @@ -621,6 +641,7 @@ def _parse_address(self, result: dict, search_type):

@staticmethod
def _parse_description(result: dict) -> Description:

description_data = result.get("description", {})

if description_data is None or not isinstance(description_data, dict):
Expand All @@ -630,7 +651,16 @@ def _parse_description(result: dict) -> Description:
if style is not None:
style = style.upper()

primary_photo = ""
if result and "primary_photo" in result:
primary_photo_info = result["primary_photo"]
if primary_photo_info and "href" in primary_photo_info:
primary_photo_href = primary_photo_info["href"]
primary_photo = primary_photo_href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")

return Description(
primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
style=style,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),
Expand All @@ -643,6 +673,7 @@ def _parse_description(result: dict) -> Description:
stories=description_data.get("stories"),
)


@staticmethod
def calculate_days_on_mls(result: dict) -> Optional[int]:
list_date_str = result.get("list_date")
Expand All @@ -661,3 +692,16 @@ def calculate_days_on_mls(result: dict) -> Optional[int]:
days = (today - list_date).days
if days >= 0:
return days

@staticmethod
def process_alt_photos(photos_info):
try:
alt_photos = []
if photos_info:
for photo_info in photos_info:
href = photo_info.get("href", "")
alt_photo_href = href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")
alt_photos.append(alt_photo_href)
return alt_photos
except Exception:
pass
4 changes: 4 additions & 0 deletions homeharvest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

ordered_properties = [
"property_url",
"primary_photo",
"alt_photos",
"mls",
"mls_id",
"status",
Expand Down Expand Up @@ -49,6 +51,8 @@ def process_result(result: Property) -> pd.DataFrame:
prop_data["price_per_sqft"] = prop_data["prc_sqft"]

description = result.description
prop_data["primary_photo"] = description.primary_photo
prop_data["alt_photos"] = ", ".join(description.alt_photos)
prop_data["style"] = description.style
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.9"
version = "0.3.10"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/HomeHarvest"
Expand Down

0 comments on commit 19f23c9

Please sign in to comment.