diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py index 757fbe4..e44a46d 100644 --- a/homeharvest/core/scrapers/redfin/__init__.py +++ b/homeharvest/core/scrapers/redfin/__init__.py @@ -16,9 +16,11 @@ def handle_location(self): def get_region_type(match_type: str): if match_type == "4": - return "2" + return "2" #: zip elif match_type == "2": - return "6" + return "6" #: city + elif match_type == "1": + return "address" #: address, needs to be handled differently if response_json['payload']['exactMatch'] is not None: target = response_json['payload']['exactMatch'] @@ -28,20 +30,30 @@ def get_region_type(match_type: str): return target['id'].split('_')[1], get_region_type(target['type']) @staticmethod - def parse_home(home: dict) -> Property: - address = Address( - address_one=home['streetLine']['value'], - city=home['city'], - state=home['state'], - zip_code=home['zip'] - ) - - url = 'https://www.redfin.com{}'.format(home['url']) - + def parse_home(home: dict, single_search: bool = False) -> Property: def get_value(key: str) -> Any | None: if key in home and 'value' in home[key]: return home[key]['value'] + if not single_search: + address = Address( + address_one=get_value('streetLine'), + city=home['city'], + state=home['state'], + zip_code=home['zip'] + ) + else: + address_info = home['streetAddress'] + + address = Address( + address_one=address_info['assembledAddress'], + city=home['city'], + state=home['state'], + zip_code=home['zip'] + ) + + url = 'https://www.redfin.com{}'.format(home['url']) + return Property( address=address, url=url, @@ -50,22 +62,41 @@ def get_value(key: str) -> Any | None: stories=home['stories'] if 'stories' in home else None, agent_name=get_value('listingAgent'), description=home['listingRemarks'] if 'listingRemarks' in home else None, - year_built=get_value('yearBuilt'), + year_built=get_value('yearBuilt') if not single_search else home['yearBuilt'], square_feet=get_value('sqFt'), price_per_square_foot=get_value('pricePerSqFt'), price=get_value('price'), mls_id=get_value('mlsId') ) + def handle_address(self, home_id: str): + """ + EPs: + https://www.redfin.com/stingray/api/home/details/initialInfo?al=1&path=/TX/Austin/70-Rainey-St-78701/unit-1608/home/147337694 + https://www.redfin.com/stingray/api/home/details/mainHouseInfoPanelInfo?propertyId=147337694&accessLevel=3 + https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId=147337694&accessLevel=3 + https://www.redfin.com/stingray/api/home/details/belowTheFold?propertyId=147337694&accessLevel=3 + """ + + url = "https://www.redfin.com/stingray/api/home/details/aboveTheFold?propertyId={}&accessLevel=3".format(home_id) + + response = self.session.get(url) + response_json = json.loads(response.text.replace('{}&&', '')) + + parsed_home = self.parse_home(response_json['payload']['addressSectionInfo'], single_search=True) + return [parsed_home] + def search(self): region_id, region_type = self.handle_location() + if region_type == "address": + home_id = region_id + return self.handle_address(home_id) + url = 'https://www.redfin.com/stingray/api/gis?al=1®ion_id={}®ion_type={}'.format(region_id, region_type) response = self.session.get(url) response_json = json.loads(response.text.replace('{}&&', '')) - homes = [self.parse_home(home) for home in response_json['payload']['homes']] + homes = [self.parse_home(home) for home in response_json['payload']['homes']] #: support buildings return homes - - diff --git a/tests/test_redfin.py b/tests/test_redfin.py index 7c73931..4c0d12d 100644 --- a/tests/test_redfin.py +++ b/tests/test_redfin.py @@ -3,6 +3,10 @@ def test_redfin(): results = [ + scrape_property( + location="2530 Al Lipscomb Way", + site_name="redfin" + ), scrape_property( location="Phoenix, AZ, USA", site_name="redfin"