Skip to content

Commit

Permalink
- realtor init
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacharyHampton committed Sep 16, 2023
1 parent af1f2fa commit 0946abd
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 1 deletion.
4 changes: 3 additions & 1 deletion homeharvest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
from .core.scrapers.redfin import RedfinScraper
from .core.scrapers.realtor import RealtorScraper
from .core.scrapers.types import ListingType, Home
from .core.scrapers import ScraperInput
from .exceptions import InvalidSite, InvalidListingType


_scrapers = {
"redfin": RedfinScraper,
"realtor.com": RealtorScraper
}


def scrape_property(
location: str,
site_name: str,
listing_type: str = "for_sale", #: for_sale, for_rent, sold
site_name: str = "redfin",
) -> list[Home]: #: eventually, return pandas dataframe
if site_name.lower() not in _scrapers:
raise InvalidSite(f"Provided site, '{site_name}', does not exist.")
Expand Down
2 changes: 2 additions & 0 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@ def search(self) -> list[Home]: ...

@staticmethod
def parse_home(home) -> Home: ...

def handle_location(self): ...
42 changes: 42 additions & 0 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import json
from ..types import Home, Address
from .. import Scraper
from typing import Any


class RealtorScraper(Scraper):
def __init__(self, scraper_input):
super().__init__(scraper_input)

def handle_location(self):
headers = {
'authority': 'parser-external.geo.moveaws.com',
'accept': '*/*',
'accept-language': 'en-US,en;q=0.9',
'origin': 'https://www.realtor.com',
'referer': 'https://www.realtor.com/',
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'cross-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
}

params = {
'input': self.location,
'client_id': 'for-sale',
'limit': '1',
'area_types': 'city,state,county,postal_code,address,street,neighborhood,school,school_district,university,park',
}

response = self.session.get('https://parser-external.geo.moveaws.com/suggest', params=params, headers=headers)
response_json = response.json()

return response_json['autocomplete'][0]

def search(self):
location_info = self.handle_location()
location_type = location_info['area_type']
print('a')
10 changes: 10 additions & 0 deletions tests/test_realtor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from homeharvest import scrape_property


def test_realtor():
result = scrape_property(
location="85281",
site_name="realtor.com"
)

assert result is not None
1 change: 1 addition & 0 deletions tests/test_redfin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

def test_redfin():
result = scrape_property(
site_name="redfin",
location="85281"
)

Expand Down

0 comments on commit 0946abd

Please sign in to comment.