Skip to content

Commit

Permalink
- last x days param
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacharyHampton committed Oct 3, 2023
1 parent 40bbf76 commit 088088a
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 8 deletions.
9 changes: 6 additions & 3 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _process_result(result: Property) -> pd.DataFrame:
return properties_df


def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None) -> pd.DataFrame:
def _scrape_single_site(location: str, site_name: str, listing_type: str, radius: float, proxy: str = None, sold_last_x_days: int = None) -> pd.DataFrame:
"""
Helper function to scrape a single site.
"""
Expand All @@ -118,6 +118,7 @@ def _scrape_single_site(location: str, site_name: str, listing_type: str, radius
site_name=SiteName.get_by_value(site_name.lower()),
proxy=proxy,
radius=radius,
sold_last_x_days=sold_last_x_days
)

site = _scrapers[site_name.lower()](scraper_input)
Expand All @@ -136,12 +137,14 @@ def scrape_property(
site_name: Union[str, list[str]] = "realtor.com",
listing_type: str = "for_sale",
radius: float = None,
sold_last_x_days: int = None,
proxy: str = None,
keep_duplicates: bool = False
) -> pd.DataFrame:
"""
Scrape property from various sites from a given location and listing type.
:param sold_last_x_days: Sold in last x days
:param radius: Radius in miles to find comparable properties on individual addresses
:param keep_duplicates:
:param proxy:
Expand All @@ -160,12 +163,12 @@ def scrape_property(
results = []

if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy)
final_df = _scrape_single_site(location, site_name[0], listing_type, radius, proxy, sold_last_x_days)
results.append(final_df)
else:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy): s_name
executor.submit(_scrape_single_site, location, s_name, listing_type, radius, proxy, sold_last_x_days): s_name
for s_name in site_name
}

Expand Down
2 changes: 2 additions & 0 deletions homeharvest/core/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class ScraperInput:
site_name: SiteName
radius: float | None = None
proxy: str | None = None
sold_last_x_days: int | None = None


class Scraper:
Expand All @@ -31,6 +32,7 @@ def __init__(self, scraper_input: ScraperInput, session: requests.Session | tls_
self.listing_type = scraper_input.listing_type
self.site_name = scraper_input.site_name
self.radius = scraper_input.radius
self.sold_last_x_days = scraper_input.sold_last_x_days

def search(self) -> list[Property]:
...
Expand Down
24 changes: 20 additions & 4 deletions homeharvest/core/scrapers/realtor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
This module implements the scraper for relator.com
"""
from ..models import Property, Address
from ..models import Property, Address, ListingType
from .. import Scraper
from ....exceptions import NoResultsFound
from ....utils import parse_address_one, parse_address_two
Expand Down Expand Up @@ -204,6 +204,10 @@ def handle_area(self, variables: dict, is_for_comps: bool = False, return_total:
}
}}"""

sold_date_param = ('sold_date: { min: "$today-%sD" }' % self.sold_last_x_days
if self.listing_type == ListingType.SOLD and self.sold_last_x_days is not None
else "")

if not is_for_comps:
query = (
"""query Home_search(
Expand All @@ -220,11 +224,17 @@ def handle_area(self, variables: dict, is_for_comps: bool = False, return_total:
postal_code: $postal_code
state_code: $state_code
status: %s
%s
}
limit: 200
offset: $offset
) %s"""
% (self.listing_type.value.lower(), results_query))
% (
self.listing_type.value.lower(),
sold_date_param,
results_query
)
)
else:
query = (
"""query Property_search(
Expand All @@ -233,10 +243,16 @@ def handle_area(self, variables: dict, is_for_comps: bool = False, return_total:
$offset: Int!,
) {
property_search(
query: { nearby: { coordinates: $coordinates, radius: $radius } }
query: {
nearby: {
coordinates: $coordinates
radius: $radius
}
%s
}
limit: 200
offset: $offset
) %s""" % results_query)
) %s""" % (sold_date_param, results_query))

payload = {
"query": query,
Expand Down
14 changes: 13 additions & 1 deletion tests/test_realtor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,19 @@ def test_realtor_comps():
radius=0.5,
)

print(result)
assert result is not None and len(result) > 0


def test_realtor_last_x_days_sold():
days_result_30 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=30
)

days_result_10 = scrape_property(
location="Dallas, TX", site_name="realtor.com", listing_type="sold", sold_last_x_days=10
)

assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len(days_result_10)


def test_realtor():
Expand Down

0 comments on commit 088088a

Please sign in to comment.