diff --git a/README.md b/README.md index af0c131..7b288bb 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## RoadMap -- Currently supports scraping from **RedFin** +- Currently, supports scraping from **RedFin** - Coming soon: Support for **Zillow** and other real estate platforms - Under consideration: Excel plugin to attract a wider audience diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index 8854ecc..96a6aa7 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -36,7 +36,13 @@ def handle_location(self): return response_json['autocomplete'][0] + def search(self): location_info = self.handle_location() location_type = location_info['area_type'] + + """ + property types: + apartment + building + commercial + condo_townhome + condo_townhome_rowhome_coop + condos + coop + duplex_triplex + farm + investment + land + mobile + multi_family + rental + single_family + townhomes + """ print('a') diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py index 2705f1d..757fbe4 100644 --- a/homeharvest/core/scrapers/redfin/__init__.py +++ b/homeharvest/core/scrapers/redfin/__init__.py @@ -14,10 +14,18 @@ def handle_location(self): response = self.session.get(url) response_json = json.loads(response.text.replace('{}&&', '')) + def get_region_type(match_type: str): + if match_type == "4": + return "2" + elif match_type == "2": + return "6" + if response_json['payload']['exactMatch'] is not None: - return response_json['payload']['exactMatch']['id'].split('_')[1] + target = response_json['payload']['exactMatch'] else: - return response_json['payload']['sections'][0]['rows'][0].split('_')[1] + target = response_json['payload']['sections'][0]['rows'][0] + + return target['id'].split('_')[1], get_region_type(target['type']) @staticmethod def parse_home(home: dict) -> Property: @@ -50,9 +58,9 @@ def get_value(key: str) -> Any | None: ) def search(self): - region_id = self.handle_location() + region_id, region_type = self.handle_location() - url = 'https://www.redfin.com/stingray/api/gis?al=1®ion_id={}®ion_type=2'.format(region_id) + url = 'https://www.redfin.com/stingray/api/gis?al=1®ion_id={}®ion_type={}'.format(region_id, region_type) response = self.session.get(url) response_json = json.loads(response.text.replace('{}&&', '')) diff --git a/homeharvest/core/scrapers/types.py b/homeharvest/core/scrapers/types.py index 3fdcd73..0c5edeb 100644 --- a/homeharvest/core/scrapers/types.py +++ b/homeharvest/core/scrapers/types.py @@ -33,3 +33,5 @@ class Property: price_per_square_foot: int | None = None price: int | None = None mls_id: str | None = None + + property_type: str | None = None diff --git a/tests/test_realtor.py b/tests/test_realtor.py index f43cd16..665db8b 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -2,9 +2,11 @@ def test_realtor(): - result = scrape_property( - location="85281", - site_name="realtor.com" - ) + results = [ + scrape_property( + location="85281", + site_name="realtor.com" + ), + ] - assert result is not None + assert all([result is not None for result in results]) diff --git a/tests/test_redfin.py b/tests/test_redfin.py index 7f8af3b..7c73931 100644 --- a/tests/test_redfin.py +++ b/tests/test_redfin.py @@ -2,9 +2,19 @@ def test_redfin(): - result = scrape_property( - site_name="redfin", - location="85281" - ) + results = [ + scrape_property( + location="Phoenix, AZ, USA", + site_name="redfin" + ), + scrape_property( + location="Dallas, TX, USA", + site_name="redfin" + ), + scrape_property( + location="85281", + site_name="redfin" + ), + ] - assert result is not None + assert all([result is not None for result in results])