diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 66b4a67..88198e2 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -21,5 +21,7 @@ def __init__(self, scraper_input: ScraperInput): "https": scraper_input.proxy_url, } - def search(self) -> list[Home]: - ... + def search(self) -> list[Home]: ... + + @staticmethod + def parse_home(home) -> Home: ... diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py index f9f2a57..1f3c058 100644 --- a/homeharvest/core/scrapers/redfin/__init__.py +++ b/homeharvest/core/scrapers/redfin/__init__.py @@ -1,6 +1,36 @@ +import json +from ..types import Home, Address from .. import Scraper class RedfinScraper(Scraper): def __init__(self, scraper_input): super().__init__(scraper_input) + + def handle_location(self): + url = 'https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}'.format(self.location) + + response = self.session.get(url) + response_json = json.loads(response.text.replace('{}&&', '')) + + if response_json['payload']['exactMatch'] is not None: + return response_json['payload']['exactMatch']['id'].split('_')[1] + else: + return response_json['payload']['sections'][0]['rows'][0].split('_')[1] + + @staticmethod + def parse_home(home) -> Home: + ... + + def search(self): + region_id = self.handle_location() + + url = 'https://www.redfin.com/stingray/api/gis?al=1®ion_id={}®ion_type=2'.format(region_id) + + response = self.session.get(url) + response_json = json.loads(response.text.replace('{}&&', '')) + + homes = [self.parse_home(home) for home in response_json['payload']['homes']] + return homes + + diff --git a/tests/test_redfin.py b/tests/test_redfin.py index 6169adc..70dfed4 100644 --- a/tests/test_redfin.py +++ b/tests/test_redfin.py @@ -3,7 +3,7 @@ def test_redfin(): result = scrape_property( - location="85001" + location="85281" ) assert result is not None