Skip to content

Commit

Permalink
Merge branch 'master' into add_agent
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Apr 16, 2024
2 parents 650d25a + cdc6f2a commit 51b37aa
Show file tree
Hide file tree
Showing 11 changed files with 282 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
**/.pytest_cache/
*.pyc
/.ipynb_checkpoints/
*.csv
*.csv
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
```bash
pip install -U homeharvest
```
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_

## Usage

Expand All @@ -39,11 +39,11 @@ properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent, pending)
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",

# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True

# mls_only=True, # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")
Expand Down Expand Up @@ -84,7 +84,7 @@ Optional
├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required.
| (use this to get properties in chunks as there's a 10k result limit)
│ Format for both must be "YYYY-MM-DD".
│ Format for both must be "YYYY-MM-DD".
│ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates)
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
Expand Down Expand Up @@ -128,19 +128,21 @@ Property
│ ├── sold_price
│ ├── last_sold_date
│ ├── price_per_sqft
│ ├── parking_garage
│ └── hoa_fee
├── Location Details:
│ ├── latitude
│ ├── longitude
└── Parking Details:
└── parking_garage
├── Agent Info:
│ ├── agent
│ ├── broker
│ └── broker_phone
```

### Exceptions
The following exceptions may be raised when using HomeHarvest:

- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD

8 changes: 2 additions & 6 deletions homeharvest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

def main():
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
parser.add_argument(
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
)
parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")

parser.add_argument(
"-l",
Expand Down Expand Up @@ -35,9 +33,7 @@ def main():
help="Name of the output file (without extension)",
)

parser.add_argument(
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
)
parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
parser.add_argument(
"-d",
"--days",
Expand Down
33 changes: 27 additions & 6 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import uuid
from .models import Property, ListingType, SiteName


Expand Down Expand Up @@ -27,6 +28,12 @@ def __init__(

if not session:
self.session = requests.Session()
self.session.headers.update(
{
"auth": f"Bearer {self.get_access_token()}",
"apollographql-client-name": "com.move.Realtor-apollo-ios",
}
)
else:
self.session = session

Expand All @@ -43,12 +50,26 @@ def __init__(
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure

def search(self) -> list[Property]:
...
def search(self) -> list[Property]: ...

@staticmethod
def _parse_home(home) -> Property:
...
def _parse_home(home) -> Property: ...

def handle_location(self):
...
def handle_location(self): ...

def get_access_token(self):
url = "https://graph.realtor.com/auth/token"

payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
headers = {
"Host": "graph.realtor.com",
"x-client-version": "24.20.4.149916",
"accept": "*/*",
"content-type": "Application/json",
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
"accept-language": "en-US,en;q=0.9",
}
response = requests.post(url, headers=headers, data=payload)

data = response.json()
return data["access_token"]
8 changes: 8 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ class Description:
stories: int | None = None


@dataclass
class Agent:
name: str | None = None
phone: str | None = None


@dataclass
class Property:
property_url: str
Expand All @@ -89,3 +95,5 @@ class Property:
latitude: float | None = None
longitude: float | None = None
neighborhoods: Optional[str] = None

agents: list[Agent] = None
3 changes: 2 additions & 1 deletion homeharvest/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class InvalidListingType(Exception):
"""Raised when a provided listing type is does not exist."""


class InvalidDate(Exception):
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23 """
"""Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23"""
15 changes: 12 additions & 3 deletions homeharvest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
"stories",
"hoa_fee",
"parking_garage",
"agent",
"broker",
"broker_phone",
"primary_photo",
"alt_photos",
]
Expand All @@ -48,6 +51,14 @@ def process_result(result: Property) -> pd.DataFrame:
prop_data["state"] = address_data.state
prop_data["zip_code"] = address_data.zip

if "agents" in prop_data:
agents = prop_data["agents"]
if agents:
prop_data["agent"] = agents[0].name
if len(agents) > 1:
prop_data["broker"] = agents[1].name
prop_data["broker_phone"] = agents[1].phone

prop_data["price_per_sqft"] = prop_data["prc_sqft"]

description = result.description
Expand All @@ -72,9 +83,7 @@ def process_result(result: Property) -> pd.DataFrame:

def validate_input(listing_type: str) -> None:
if listing_type.upper() not in ListingType.__members__:
raise InvalidListingType(
f"Provided listing type, '{listing_type}', does not exist."
)
raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.")


def validate_dates(date_from: str | None, date_to: str | None) -> None:
Expand Down
Loading

0 comments on commit 51b37aa

Please sign in to comment.