Skip to content

Commit

Permalink
- extra_property_details parameter
Browse files Browse the repository at this point in the history
- updated docs
- classified exception
  • Loading branch information
ZacharyHampton committed May 2, 2024
1 parent 46985dc commit c3e24a4
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 16 deletions.
15 changes: 6 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ properties = scrape_property(
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True

# mls_only=True, # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")
Expand Down Expand Up @@ -92,6 +91,8 @@ Optional
├── foreclosure (True/False): If set, fetches only foreclosures
└── proxy (string): In format 'http://user:pass@host:port'
└── extra_property_data (bool): Increases requests by O(n). If set, this fetches additional property data (e.g. agent, broker, property evaluations etc.)
```

### Property Schema
Expand Down Expand Up @@ -139,17 +140,13 @@ Property
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
│ ├── agent_email
│ └── agent_phone
```

### Exceptions
The following exceptions may be raised when using HomeHarvest:

- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD.
- `AuthenticationError` - Realtor.com token request failed.
1 change: 1 addition & 0 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def scrape_property(
date_from=date_from,
date_to=date_to,
foreclosure=foreclosure,
extra_property_data=extra_property_data,
)

site = RealtorScraper(scraper_input)
Expand Down
21 changes: 14 additions & 7 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import uuid
from ...exceptions import AuthenticationError
from .models import Property, ListingType, SiteName


Expand All @@ -11,12 +12,13 @@ class ScraperInput:
location: str
listing_type: ListingType
radius: float | None = None
mls_only: bool | None = None
mls_only: bool | None = False
proxy: str | None = None
last_x_days: int | None = None
date_from: str | None = None
date_to: str | None = None
foreclosure: bool | None = None
foreclosure: bool | None = False
extra_property_data: bool | None = True


class Scraper:
Expand Down Expand Up @@ -57,6 +59,7 @@ def __init__(
self.date_from = scraper_input.date_from
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure
self.extra_property_data = scraper_input.extra_property_data

def search(self) -> list[Property]: ...

Expand All @@ -65,7 +68,8 @@ def _parse_home(home) -> Property: ...

def handle_location(self): ...

def get_access_token(self):
@staticmethod
def get_access_token():
url = "https://graph.realtor.com/auth/token"

payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
Expand All @@ -80,8 +84,11 @@ def get_access_token(self):
response = requests.post(url, headers=headers, data=payload)

data = response.json()
try:
access_token = data["access_token"]
except Exception:
raise Exception("Could not get access token, use a proxy/vpn or wait")

if not (access_token := data.get("access_token")):
raise AuthenticationError(
"Failed to get access token, use a proxy/vpn or wait a moment and try again.",
response=response
)

return access_token
3 changes: 3 additions & 0 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,9 @@ def search(self):
return homes

def get_prop_details(self, property_id: str) -> dict:
if not self.extra_property_data:
return {}

query = """query GetHome($property_id: ID!) {
home(property_id: $property_id) {
__typename
Expand Down
8 changes: 8 additions & 0 deletions homeharvest/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,11 @@ class InvalidListingType(Exception):

class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""


class AuthenticationError(Exception):
"""Raised when there is an issue with the authentication process."""
def __init__(self, *args, response):
super().__init__(*args)

self.response = response
14 changes: 14 additions & 0 deletions tests/test_realtor.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,17 @@ def test_realtor_foreclosed():
def test_realtor_agent():
scraped = scrape_property(location="Detroit, MI", listing_type="for_sale")
assert scraped["agent"].nunique() > 1


def test_realtor_without_extra_details():
results = [
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
extra_property_data=False,
),
scrape_property(
location="15509 N 172nd Dr, Surprise, AZ 85388",
),
]

assert results[0] != results[1]

0 comments on commit c3e24a4

Please sign in to comment.