Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Schools #69

Merged
merged 5 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ Property
├── Location Details:
│ ├── latitude
│ ├── longitude
│ ├── nearby_schools


├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone

├── Agent Info:
│ ├── agent
Expand Down
1 change: 1 addition & 0 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import uuid
from dataclasses import dataclass
import requests
import uuid
Expand Down
7 changes: 7 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class ListingType(Enum):
SOLD = "SOLD"


@dataclass
class Agent:
name: str | None = None
phone: str | None = None


class PropertyType(Enum):
APARTMENT = "APARTMENT"
BUILDING = "BUILDING"
Expand Down Expand Up @@ -97,3 +103,4 @@ class Property:
neighborhoods: Optional[str] = None

agents: list[Agent] = None
nearby_schools: list[str] = None
38 changes: 24 additions & 14 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def handle_listing(self, listing_id: str) -> list[Property]:
days_on_mls = None

property_id = property_info["details"]["permalink"]
agents = self.get_agents(property_id)
agents_schools = self.get_agents_schools(property_id)
listing = Property(
mls=mls,
mls_id=(
Expand Down Expand Up @@ -178,7 +178,8 @@ def handle_listing(self, listing_id: str) -> list[Property]:
stories=property_info["details"].get("stories"),
),
days_on_mls=days_on_mls,
agents=agents,
agents=agents_schools["agents"],
nearby_schools=agents_schools["schools"],
)

return [listing]
Expand Down Expand Up @@ -272,7 +273,7 @@ def handle_address(self, property_id: str) -> list[Property]:
}"""

variables = {"property_id": property_id}
agents = self.get_agents(property_id)
agents_schools = self.get_agents_schools(property_id)

payload = {
"query": query,
Expand All @@ -290,7 +291,8 @@ def handle_address(self, property_id: str) -> list[Property]:
property_url=f"{self.PROPERTY_URL}{property_info['details']['permalink']}",
address=self._parse_address(property_info, search_type="handle_address"),
description=self._parse_description(property_info),
agents=agents,
agents=agents_schools["agents"],
nearby_schools=agents_schools["schools"],
)
]

Expand Down Expand Up @@ -510,7 +512,7 @@ def process_property(result: dict) -> Property | None:
return

property_id = result["property_id"]
agents = self.get_agents(property_id)
agents_schools = self.get_agents_schools(property_id)

realty_property = Property(
mls=mls,
Expand All @@ -535,7 +537,8 @@ def process_property(result: dict) -> Property | None:
address=self._parse_address(result, search_type="general_search"),
description=self._parse_description(result),
days_on_mls=self.calculate_days_on_mls(result),
agents=agents,
agents=agents_schools["agents"],
nearby_schools=agents_schools["schools"],
)
return realty_property

Expand Down Expand Up @@ -630,18 +633,25 @@ def search(self):

return homes

def get_agents(self, property_id: str) -> list[Agent]:
payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n home(property_id: $property_id) {{\\n __typename\\n\\n consumerAdvertisers: consumer_advertisers {{\\n __typename\\n type\\n advertiserId: advertiser_id\\n name\\n phone\\n type\\n href\\n slogan\\n photo {{\\n __typename\\n href\\n }}\\n showRealtorLogo: show_realtor_logo\\n hours\\n }}\\n\\n\\n }}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}'
def get_agents_schools(self, property_id: str) -> dict:
payload = f'{{"query":"query GetHome($property_id: ID!) {{\\n home(property_id: $property_id) {{\\n __typename\\n\\n consumerAdvertisers: consumer_advertisers {{\\n __typename\\n type\\n advertiserId: advertiser_id\\n name\\n phone\\n type\\n href\\n slogan\\n photo {{\\n __typename\\n href\\n }}\\n showRealtorLogo: show_realtor_logo\\n hours\\n }}\\n\\n\\n nearbySchools: nearby_schools(radius: 5.0, limit_per_level: 3) {{ __typename schools {{ district {{ __typename id name }} }} }}}}\\n}}\\n","variables":{{"property_id":"{property_id}"}}}}'
response = self.session.post(self.PROPERTY_GQL, data=payload)

data = response.json()
try:
ads = data["data"]["home"]["consumerAdvertisers"]
except (KeyError, TypeError):
return []
def get_key(keys: list):
try:
data = response.json()
for key in keys:
data = data[key]
return data
except (KeyError, TypeError):
return []

ads = get_key(["data", "home", "consumerAdvertisers"])
schools = get_key(["data", "home", "nearbySchools", "schools"])

agents = [Agent(name=ad["name"], phone=ad["phone"]) for ad in ads]
return agents
schools = [school["district"]["name"] for school in schools]
return {"agents": agents, "schools": schools}

@staticmethod
def _parse_neighborhoods(result: dict) -> Optional[str]:
Expand Down
3 changes: 3 additions & 0 deletions homeharvest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"agent",
"broker",
"broker_phone",
"nearby_schools",
"primary_photo",
"alt_photos",
]
Expand All @@ -60,6 +61,8 @@ def process_result(result: Property) -> pd.DataFrame:
prop_data["broker_phone"] = agents[1].phone

prop_data["price_per_sqft"] = prop_data["prc_sqft"]
prop_data["nearby_schools"] = filter(None, prop_data["nearby_schools"]) if prop_data["nearby_schools"] else None
prop_data["nearby_schools"] = ", ".join(set(prop_data["nearby_schools"])) if prop_data["nearby_schools"] else None

description = result.description
prop_data["primary_photo"] = description.primary_photo
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "homeharvest"
version = "0.3.17"
version = "0.3.18"
description = "Real estate scraping library"
authors = ["Zachary Hampton <[email protected]>", "Cullen Watson <[email protected]>"]
homepage = "https://github.com/Bunsly/HomeHarvest"
Expand Down
Loading