diff --git a/README.md b/README.md index a634257..eea8e7e 100644 --- a/README.md +++ b/README.md @@ -133,6 +133,13 @@ Property ├── Location Details: │ ├── latitude │ ├── longitude +│ ├── nearby_schools + + +├── Agent Info: +│ ├── agent +│ ├── broker +│ └── broker_phone └── Parking Details: └── parking_garage diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 2b824ba..702723e 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -1,6 +1,6 @@ +import uuid from dataclasses import dataclass import requests -import uuid from .models import Property, ListingType, SiteName diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index c54417c..f6e6a4e 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -23,6 +23,12 @@ class ListingType(Enum): SOLD = "SOLD" +@dataclass +class Agent: + name: str | None = None + phone: str | None = None + + class PropertyType(Enum): APARTMENT = "APARTMENT" BUILDING = "BUILDING" @@ -69,12 +75,6 @@ class Description: stories: int | None = None -@dataclass -class Agent: - name: str | None = None - phone: str | None = None - - @dataclass class Property: property_url: str @@ -97,3 +97,4 @@ class Property: neighborhoods: Optional[str] = None agents: list[Agent] = None + nearby_schools: list[str] = None diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 3f22e8b..febd942 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -142,7 +142,7 @@ def handle_listing(self, listing_id: str) -> list[Property]: days_on_mls = None property_id = property_info["details"]["permalink"] - agents = self.get_agents(property_id) + agents_schools = self.get_agents_schools(property_id) listing = Property( mls=mls, mls_id=( @@ -178,7 +178,8 @@ def handle_listing(self, listing_id: str) -> list[Property]: stories=property_info["details"].get("stories"), ), days_on_mls=days_on_mls, - agents=agents, + agents=agents_schools["agents"], + nearby_schools=agents_schools["schools"], ) return [listing] @@ -272,7 +273,7 @@ def handle_address(self, property_id: str) -> list[Property]: }""" variables = {"property_id": property_id} - agents = self.get_agents(property_id) + agents_schools = self.get_agents_schools(property_id) payload = { "query": query, @@ -290,7 +291,8 @@ def handle_address(self, property_id: str) -> list[Property]: property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}", address=self._parse_address(property_info, search_type="handle_address"), description=self._parse_description(property_info), - agents=agents, + agents=agents_schools["agents"], + nearby_schools=agents_schools["schools"], ) ] @@ -510,7 +512,7 @@ def process_property(result: dict) -> Property | None: return property_id = result["property_id"] - agents = self.get_agents(property_id) + agents_schools = self.get_agents_schools(property_id) realty_property = Property( mls=mls, @@ -535,7 +537,8 @@ def process_property(result: dict) -> Property | None: address=self._parse_address(result, search_type="general_search"), description=self._parse_description(result), days_on_mls=self.calculate_days_on_mls(result), - agents=agents, + agents=agents_schools["agents"], + nearby_schools=agents_schools["schools"], ) return realty_property @@ -630,18 +633,25 @@ def search(self): return homes - def get_agents(self, property_id: str) -> list[Agent]: - payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n home(property_id: $property_id) {{\\n __typename\\n\\n consumerAdvertisers: consumer_advertisers {{\\n __typename\\n type\\n advertiserId: advertiser_id\\n name\\n phone\\n type\\n href\\n slogan\\n photo {{\\n __typename\\n href\\n }}\\n showRealtorLogo: show_realtor_logo\\n hours\\n }}\\n\\n\\n }}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}' + def get_agents_schools(self, property_id: str) -> dict: + payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n home(property_id: $property_id) {{\\n __typename\\n\\n consumerAdvertisers: consumer_advertisers {{\\n __typename\\n type\\n advertiserId: advertiser_id\\n name\\n phone\\n type\\n href\\n slogan\\n photo {{\\n __typename\\n href\\n }}\\n showRealtorLogo: show_realtor_logo\\n hours\\n }}\\n\\n\\n nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) {{ __typename schools {{ district {{ __typename id name }} }} }}}}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}' response = self.session.post(self.PROPERTY_GQL, data=payload) - data = response.json() - try: - ads = data["data"]["home"]["consumerAdvertisers"] - except (KeyError, TypeError): - return [] + def get_key(keys: list): + try: + data = response.json() + for key in keys: + data = data[key] + return data + except (KeyError, TypeError): + return [] + + ads = get_key(["data", "home", "consumerAdvertisers"]) + schools = get_key(["data", "home", "nearbySchools", "schools"]) agents = [Agent(name=ad["name"], phone=ad["phone"]) for ad in ads] - return agents + schools = [school["district"]["name"] for school in schools] + return {"agents": agents, "schools": schools} @staticmethod def _parse_neighborhoods(result: dict) -> Optional[str]: diff --git a/homeharvest/utils.py b/homeharvest/utils.py index 74def20..64a52d2 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -34,6 +34,7 @@ "agent", "broker", "broker_phone", + "nearby_schools", "primary_photo", "alt_photos", ] @@ -60,6 +61,8 @@ def process_result(result: Property) -> pd.DataFrame: prop_data["broker_phone"] = agents[1].phone prop_data["price_per_sqft"] = prop_data["prc_sqft"] + prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None + prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None description = result.description prop_data["primary_photo"] = description.primary_photo diff --git a/pyproject.toml b/pyproject.toml index c720758..529e5c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.16" +version = "0.3.18" description = "Real estate scraping library" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/HomeHarvest"