Skip to content

Commit

Permalink
- refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
ZacharyHampton committed Sep 18, 2023
1 parent d0a6a66 commit 94e5b09
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions homeharvest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _scrape_single_site(

def scrape_property(
location: str,
site_name: Union[str, list[str]] = list(_scrapers.keys()),
site_name: Union[str, list[str]] = None,
listing_type: str = "for_sale",
) -> pd.DataFrame:
"""
Expand All @@ -138,24 +138,25 @@ def scrape_property(
if not isinstance(site_name, list):
site_name = [site_name]

results = []

if len(site_name) == 1:
final_df = _scrape_single_site(location, site_name[0], listing_type)
final_df = final_df.drop_duplicates(subset="street_address", keep="first")
return final_df

results = []
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
for s_name in site_name
}
results.append(final_df)
else:
with ThreadPoolExecutor() as executor:
futures = {
executor.submit(_scrape_single_site, location, s_name, listing_type): s_name
for s_name in site_name
}

for future in concurrent.futures.as_completed(futures):
result = future.result()
results.append(result)
for future in concurrent.futures.as_completed(futures):
result = future.result()
results.append(result)

if not results:
return pd.DataFrame()

final_df = pd.concat(results, ignore_index=True)
final_df = final_df.drop_duplicates(subset="street_address", keep="first")
return final_df

0 comments on commit 94e5b09

Please sign in to comment.