diff --git a/README.md b/README.md index ff8c371..5dcbe01 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,6 @@ properties = scrape_property( # date_from="2023-05-01", # alternative to past_days # date_to="2023-05-28", # foreclosure=True - # mls_only=True, # only fetch MLS listings ) print(f"Number of properties: {len(properties)}") @@ -92,6 +91,8 @@ Optional ├── foreclosure (True/False): If set, fetches only foreclosures │ └── proxy (string): In format 'http://user:pass@host:port' +│ +└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.) ``` ### Property Schema @@ -139,17 +140,13 @@ Property ├── Agent Info: │ ├── agent -│ ├── broker -│ └── broker_phone - -├── Agent Info: -│ ├── agent -│ ├── broker -│ └── broker_phone +│ ├── agent_email +│ └── agent_phone ``` ### Exceptions The following exceptions may be raised when using HomeHarvest: - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold` -- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD +- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD. +- `AuthenticationError` - Realtor.com token request failed. diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index 6f2c54f..c3005a0 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -43,6 +43,7 @@ def scrape_property( date_from=date_from, date_to=date_to, foreclosure=foreclosure, + extra_property_data=extra_property_data, ) site = RealtorScraper(scraper_input) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index f75e56b..c8aaf3e 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -3,6 +3,7 @@ from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry import uuid +from ...exceptions import AuthenticationError from .models import Property, ListingType, SiteName @@ -11,12 +12,13 @@ class ScraperInput: location: str listing_type: ListingType radius: float | None = None - mls_only: bool | None = None + mls_only: bool | None = False proxy: str | None = None last_x_days: int | None = None date_from: str | None = None date_to: str | None = None - foreclosure: bool | None = None + foreclosure: bool | None = False + extra_property_data: bool | None = True class Scraper: @@ -57,6 +59,7 @@ def __init__( self.date_from = scraper_input.date_from self.date_to = scraper_input.date_to self.foreclosure = scraper_input.foreclosure + self.extra_property_data = scraper_input.extra_property_data def search(self) -> list[Property]: ... @@ -65,7 +68,8 @@ def _parse_home(home) -> Property: ... def handle_location(self): ... - def get_access_token(self): + @staticmethod + def get_access_token(): url = "https://graph.realtor.com/auth/token" payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}' @@ -80,8 +84,11 @@ def get_access_token(self): response = requests.post(url, headers=headers, data=payload) data = response.json() - try: - access_token = data["access_token"] - except Exception: - raise Exception("Could not get access token, use a proxy/vpn or wait") + + if not (access_token := data.get("access_token")): + raise AuthenticationError( + "Failed to get access token, use a proxy/vpn or wait a moment and try again.", + response=response + ) + return access_token diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 3d238d0..ed7cb27 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -651,6 +651,9 @@ def search(self): return homes def get_prop_details(self, property_id: str) -> dict: + if not self.extra_property_data: + return {} + query = """query GetHome($property_id: ID!) { home(property_id: $property_id) { __typename diff --git a/homeharvest/exceptions.py b/homeharvest/exceptions.py index c3f5111..22f9b9f 100644 --- a/homeharvest/exceptions.py +++ b/homeharvest/exceptions.py @@ -4,3 +4,11 @@ class InvalidListingType(Exception): class InvalidDate(Exception): """Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23""" + + +class AuthenticationError(Exception): + """Raised when there is an issue with the authentication process.""" + def __init__(self, *args, response): + super().__init__(*args) + + self.response = response diff --git a/tests/test_realtor.py b/tests/test_realtor.py index 01e0e6c..f475eee 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -142,3 +142,17 @@ def test_realtor_foreclosed(): def test_realtor_agent(): scraped = scrape_property(location="Detroit, MI", listing_type="for_sale") assert scraped["agent"].nunique() > 1 + + +def test_realtor_without_extra_details(): + results = [ + scrape_property( + location="15509 N 172nd Dr, Surprise, AZ 85388", + extra_property_data=False, + ), + scrape_property( + location="15509 N 172nd Dr, Surprise, AZ 85388", + ), + ] + + assert results[0] != results[1]