Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add photos #43

Merged
merged 3 commits into from
Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Address:

@dataclass
class Description:
primary_photo: str | None = None
alt_photos: list[str] | None = None
style: str | None = None
beds: int | None = None
baths_full: int | None = None
Expand Down
44 changes: 44 additions & 0 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def handle_listing(self, listing_id: str) -> list[Property]:
garage
permalink
}
primary_photo {
href
}
photos {
href
}
}
}"""

Expand Down Expand Up @@ -152,6 +158,8 @@ def handle_listing(self, listing_id: str) -> list[Property]:
else None,
address=self._parse_address(property_info, search_type="handle_listing"),
description=Description(
primary_photo=property_info["primary_photo"].get("href", "").replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75"),
alt_photos=self.process_alt_photos(property_info.get("photos", [])),
style=property_info["basic"].get("type", "").upper(),
beds=property_info["basic"].get("beds"),
baths_full=property_info["basic"].get("baths_full"),
Expand Down Expand Up @@ -247,6 +255,12 @@ def handle_address(self, property_id: str) -> list[Property]:
units
year_built
}
primary_photo {
href
}
photos {
href
}
}
}"""

Expand Down Expand Up @@ -334,6 +348,12 @@ def general_search(
name
}
}
primary_photo {
href
}
photos {
href
}
}
}
}"""
Expand Down Expand Up @@ -621,6 +641,7 @@ def _parse_address(self, result: dict, search_type):

@staticmethod
def _parse_description(result: dict) -> Description:

description_data = result.get("description", {})

if description_data is None or not isinstance(description_data, dict):
Expand All @@ -630,7 +651,16 @@ def _parse_description(result: dict) -> Description:
if style is not None:
style = style.upper()

primary_photo = ""
if result and "primary_photo" in result:
primary_photo_info = result["primary_photo"]
if primary_photo_info and "href" in primary_photo_info:
primary_photo_href = primary_photo_info["href"]
primary_photo = primary_photo_href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")

return Description(
primary_photo=primary_photo,
alt_photos=RealtorScraper.process_alt_photos(result.get("photos")),
style=style,
beds=description_data.get("beds"),
baths_full=description_data.get("baths_full"),
Expand All @@ -643,6 +673,7 @@ def _parse_description(result: dict) -> Description:
stories=description_data.get("stories"),
)


@staticmethod
def calculate_days_on_mls(result: dict) -> Optional[int]:
list_date_str = result.get("list_date")
Expand All @@ -661,3 +692,16 @@ def calculate_days_on_mls(result: dict) -> Optional[int]:
days = (today - list_date).days
if days >= 0:
return days

@staticmethod
def process_alt_photos(photos_info):
try:
alt_photos = []
if photos_info:
for photo_info in photos_info:
href = photo_info.get("href", "")
alt_photo_href = href.replace("s.jpg", "od-w480_h360_x2.webp?w=1080&q=75")
alt_photos.append(alt_photo_href)
return alt_photos
except Exception:
pass
4 changes: 4 additions & 0 deletions homeharvest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

ordered_properties = [
"property_url",
"primary_photo",
"alt_photos",
"mls",
"mls_id",
"status",
Expand Down Expand Up @@ -49,6 +51,8 @@ def process_result(result: Property) -> pd.DataFrame:
prop_data["price_per_sqft"] = prop_data["prc_sqft"]

description = result.description
prop_data["primary_photo"] = description.primary_photo
prop_data["alt_photos"] = ", ".join(description.alt_photos)
prop_data["style"] = description.style
prop_data["beds"] = description.beds
prop_data["full_baths"] = description.baths_full
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.9"
version = "0.3.10"
description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin."
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/HomeHarvest"
Expand Down