Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add agents #67

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
**/.pytest_cache/
*.pyc
/.ipynb_checkpoints/
*.csv
*.csv
21 changes: 21 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-added-large-files
- id: check-yaml
- repo: https://github.com/adrienverge/yamllint
rev: v1.29.0
hooks:
- id: yamllint
verbose: true # create awareness of linter findings
args: ["-d", "{extends: relaxed, rules: {line-length: {max: 120}}}"]
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
language_version: python
args: [--line-length=120, --quiet]
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
```bash
pip install -U homeharvest
```
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_

## Usage

Expand All @@ -39,11 +39,11 @@ properties = scrape_property(
location="San Diego, CA",
listing_type="sold", # or (for_sale, for_rent, pending)
past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent)
# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",

# date_from="2023-05-01", # alternative to past_days
# date_to="2023-05-28",
# foreclosure=True

# mls_only=True, # only fetch MLS listings
)
print(f"Number of properties: {len(properties)}")
Expand Down Expand Up @@ -84,7 +84,7 @@ Optional
├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required.
| (use this to get properties in chunks as there's a 10k result limit)
│ Format for both must be "YYYY-MM-DD".
│ Format for both must be "YYYY-MM-DD".
│ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates)
├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings)
Expand Down Expand Up @@ -143,4 +143,3 @@ The following exceptions may be raised when using HomeHarvest:

- `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold`
- `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD

8 changes: 2 additions & 6 deletions homeharvest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

def main():
parser = argparse.ArgumentParser(description="Home Harvest Property Scraper")
parser.add_argument(
"location", type=str, help="Location to scrape (e.g., San Francisco, CA)"
)
parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)")

parser.add_argument(
"-l",
Expand Down Expand Up @@ -35,9 +33,7 @@ def main():
help="Name of the output file (without extension)",
)

parser.add_argument(
"-p", "--proxy", type=str, default=None, help="Proxy to use for scraping"
)
parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping")
parser.add_argument(
"-d",
"--days",
Expand Down
33 changes: 27 additions & 6 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
import requests
import uuid
from .models import Property, ListingType, SiteName


Expand Down Expand Up @@ -27,6 +28,12 @@ def __init__(

if not session:
self.session = requests.Session()
self.session.headers.update(
{
"auth": f"Bearer {self.get_access_token()}",
"apollographql-client-name": "com.move.Realtor-apollo-ios",
}
)
else:
self.session = session

Expand All @@ -43,12 +50,26 @@ def __init__(
self.date_to = scraper_input.date_to
self.foreclosure = scraper_input.foreclosure

def search(self) -> list[Property]:
...
def search(self) -> list[Property]: ...

@staticmethod
def _parse_home(home) -> Property:
...
def _parse_home(home) -> Property: ...

def handle_location(self):
...
def handle_location(self): ...

def get_access_token(self):
url = "https://graph.realtor.com/auth/token"

payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}'
headers = {
"Host": "graph.realtor.com",
"x-client-version": "24.20.4.149916",
"accept": "*/*",
"content-type": "Application/json",
"user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0",
"accept-language": "en-US,en;q=0.9",
}
response = requests.post(url, headers=headers, data=payload)

data = response.json()
return data["access_token"]
8 changes: 8 additions & 0 deletions homeharvest/core/scrapers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ class Description:
stories: int | None = None


@dataclass
class Agent:
name: str | None = None
phone: str | None = None


@dataclass
class Property:
property_url: str
Expand All @@ -89,3 +95,5 @@ class Property:
latitude: float | None = None
longitude: float | None = None
neighborhoods: Optional[str] = None

agents: list[Agent] = None
Loading
Loading