diff --git a/.gitignore b/.gitignore index 8c31f0c..1f97e4a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ **/dist/ **/__pycache__/ **/.pytest_cache/ -*.pyc \ No newline at end of file +*.pyc +/.ipynb_checkpoints/ \ No newline at end of file diff --git a/HomeHarvest_Demo.ipynb b/HomeHarvest_Demo.ipynb new file mode 100644 index 0000000..fc0dceb --- /dev/null +++ b/HomeHarvest_Demo.ipynb @@ -0,0 +1,73 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "cb48903e-5021-49fe-9688-45cd0bc05d0f", + "metadata": {}, + "outputs": [], + "source": [ + "from homeharvest import scrape_property\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "156488ce-0d5f-43c5-87f4-c33e9c427860", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None) # Show all columns\n", + "pd.set_option('display.max_rows', None) # Show all rows\n", + "pd.set_option('display.width', None) # Auto-adjust display width to fit console\n", + "pd.set_option('display.max_colwidth', 50) # Limit max column width to 50 characters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c8b9744-8606-4e9b-8add-b90371a249a7", + "metadata": {}, + "outputs": [], + "source": [ + "scrape_property(\n", + " location=\"dallas\", site_name=\"zillow\", listing_type=\"for_sale\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab7b4c21-da1d-4713-9df4-d7425d8ce21e", + "metadata": {}, + "outputs": [], + "source": [ + "scrape_property(\n", + " location=\"dallas\", site_name=\"redfin\", listing_type=\"for_sale\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/homeharvest/__init__.py b/homeharvest/__init__.py index aba1fd5..52dfb9b 100644 --- a/homeharvest/__init__.py +++ b/homeharvest/__init__.py @@ -1,10 +1,11 @@ from .core.scrapers.redfin import RedfinScraper from .core.scrapers.realtor import RealtorScraper from .core.scrapers.zillow import ZillowScraper -from .core.scrapers.models import ListingType, Property, Building +from .core.scrapers.models import ListingType, Property, Building, SiteName from .core.scrapers import ScraperInput from .exceptions import InvalidSite, InvalidListingType from typing import Union +import pandas as pd _scrapers = { @@ -18,7 +19,7 @@ def scrape_property( location: str, site_name: str, listing_type: str = "for_sale", #: for_sale, for_rent, sold -) -> Union[list[Building], list[Property]]: #: eventually, return pandas dataframe +) -> Union[list[Building], list[Property]]: if site_name.lower() not in _scrapers: raise InvalidSite(f"Provided site, '{site_name}', does not exist.") @@ -30,8 +31,69 @@ def scrape_property( scraper_input = ScraperInput( location=location, listing_type=ListingType[listing_type.upper()], + site_name=SiteName[site_name.upper()], ) site = _scrapers[site_name.lower()](scraper_input) + results = site.search() - return site.search() + properties_dfs = [] + + for result in results: + prop_data = result.__dict__ + + address_data = prop_data["address"] + prop_data["site_name"] = prop_data["site_name"].value + prop_data["listing_type"] = prop_data["listing_type"].value + prop_data["property_type"] = prop_data["property_type"].value.lower() + prop_data["address_one"] = address_data.address_one + prop_data["city"] = address_data.city + prop_data["state"] = address_data.state + prop_data["zip_code"] = address_data.zip_code + prop_data["address_two"] = address_data.address_two + + del prop_data["address"] + + if isinstance(result, Property): + desired_order = [ + "listing_type", + "address_one", + "city", + "state", + "zip_code", + "address_two", + "url", + "property_type", + "price", + "beds", + "baths", + "square_feet", + "price_per_square_foot", + "lot_size", + "stories", + "year_built", + "agent_name", + "mls_id", + "description", + ] + + elif isinstance(result, Building): + desired_order = [ + "address_one", + "city", + "state", + "zip_code", + "address_two", + "url", + "num_units", + "min_unit_price", + "max_unit_price", + "avg_unit_price", + "listing_type", + ] + + properties_df = pd.DataFrame([prop_data]) + properties_df = properties_df[desired_order] + properties_dfs.append(properties_df) + + return pd.concat(properties_dfs, ignore_index=True) diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index 350a4e5..873bf76 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -1,12 +1,13 @@ from dataclasses import dataclass import requests -from .models import Property, ListingType +from .models import Property, ListingType, SiteName @dataclass class ScraperInput: location: str listing_type: ListingType + site_name: SiteName proxy_url: str | None = None @@ -14,6 +15,8 @@ class Scraper: def __init__(self, scraper_input: ScraperInput): self.location = scraper_input.location self.session = requests.Session() + self.listing_type = scraper_input.listing_type + self.site_name = scraper_input.site_name if scraper_input.proxy_url: self.session.proxies = { diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 3c52acf..9da1d30 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -2,12 +2,43 @@ from enum import Enum +class SiteName(Enum): + ZILLOW = "zillow" + REDFIN = "redfin" + REALTOR = "realtor.com" + + class ListingType(Enum): FOR_SALE = "for_sale" FOR_RENT = "for_rent" SOLD = "sold" +class PropertyType(Enum): + HOUSE = "HOUSE" + CONDO = "CONDO" + TOWNHOUSE = "townhousE" + SINGLE_FAMILY = "SINGLE_FAMILY" + MULTI_FAMILY = "MULTI_FAMILY" + LAND = "LAND" + OTHER = "OTHER" + + @classmethod + def from_int_code(cls, code): + mapping = { + 1: cls.HOUSE, + 2: cls.CONDO, + 3: cls.TOWNHOUSE, + 4: cls.MULTI_FAMILY, + 5: cls.LAND, + 6: cls.OTHER, + 8: cls.SINGLE_FAMILY, + 13: cls.SINGLE_FAMILY, + } + + return mapping.get(code, cls.OTHER) + + @dataclass class Address: address_one: str @@ -18,35 +49,35 @@ class Address: address_two: str | None = None -@dataclass -class Property: +@dataclass() +class Realty: + site_name: SiteName address: Address url: str + listing_type: ListingType | None = None + +@dataclass +class Property(Realty): + price: int | None = None beds: int | None = None baths: float | None = None stories: int | None = None - agent_name: str | None = None year_built: int | None = None square_feet: int | None = None price_per_square_foot: int | None = None year_built: int | None = None - price: int | None = None mls_id: str | None = None - listing_type: ListingType | None = None + agent_name: str | None = None + property_type: PropertyType | None = None lot_size: int | None = None description: str | None = None @dataclass -class Building: - address: Address - url: str - +class Building(Realty): num_units: int | None = None min_unit_price: int | None = None max_unit_price: int | None = None avg_unit_price: int | None = None - - listing_type: str | None = None diff --git a/homeharvest/core/scrapers/redfin/__init__.py b/homeharvest/core/scrapers/redfin/__init__.py index 3c70325..3f08f20 100644 --- a/homeharvest/core/scrapers/redfin/__init__.py +++ b/homeharvest/core/scrapers/redfin/__init__.py @@ -1,5 +1,5 @@ import json -from ..models import Property, Address +from ..models import Property, Address, PropertyType from .. import Scraper from typing import Any @@ -7,6 +7,7 @@ class RedfinScraper(Scraper): def __init__(self, scraper_input): super().__init__(scraper_input) + self.listing_type = scraper_input.listing_type def _handle_location(self): url = "https://www.redfin.com/stingray/do/location-autocomplete?v=2&al=1&location={}".format( @@ -31,8 +32,7 @@ def get_region_type(match_type: str): return target["id"].split("_")[1], get_region_type(target["type"]) - @staticmethod - def _parse_home(home: dict, single_search: bool = False) -> Property: + def _parse_home(self, home: dict, single_search: bool = False) -> Property: def get_value(key: str) -> Any | None: if key in home and "value" in home[key]: return home[key]["value"] @@ -53,10 +53,12 @@ def get_value(key: str) -> Any | None: state=home["state"], zip_code=home["zip"], ) - url = "https://www.redfin.com{}".format(home["url"]) + property_type = home["propertyType"] if "propertyType" in home else None return Property( + site_name=self.site_name, + listing_type=self.listing_type, address=address, url=url, beds=home["beds"] if "beds" in home else None, @@ -68,6 +70,8 @@ def get_value(key: str) -> Any | None: if not single_search else home["yearBuilt"], square_feet=get_value("sqFt"), + lot_size=home.get("lotSize", {}).get("value", None), + property_type=PropertyType.from_int_code(home.get("propertyType")), price_per_square_foot=get_value("pricePerSqFt"), price=get_value("price"), mls_id=get_value("mlsId"), diff --git a/homeharvest/core/scrapers/zillow/__init__.py b/homeharvest/core/scrapers/zillow/__init__.py index 8154077..cc41826 100644 --- a/homeharvest/core/scrapers/zillow/__init__.py +++ b/homeharvest/core/scrapers/zillow/__init__.py @@ -1,13 +1,11 @@ import re import json -from ..models import Property, Address, Building, ListingType +from ..models import Property, Address, Building, ListingType, PropertyType from ....exceptions import NoResultsFound, PropertyNotFound from .. import Scraper class ZillowScraper(Scraper): - listing_type: ListingType.FOR_SALE - def __init__(self, scraper_input): super().__init__(scraper_input) self.listing_type = scraper_input.listing_type @@ -65,15 +63,17 @@ def _parse_home(self, home: dict): agent_name = self._extract_agent_name(home) beds = home["hdpData"]["homeInfo"]["bedrooms"] baths = home["hdpData"]["homeInfo"]["bathrooms"] - listing_type = home["hdpData"]["homeInfo"].get("homeType") + property_type = home["hdpData"]["homeInfo"].get("homeType") return Property( + site_name=self.site_name, address=address, agent_name=agent_name, url=url, beds=beds, baths=baths, - listing_type=listing_type, + listing_type=self.listing_type, + property_type=PropertyType(property_type), **price_data, ) else: @@ -83,10 +83,11 @@ def _parse_home(self, home: dict): address = Address(address_one, city, state, zip_code, address_two) building_info = self._extract_building_info(home) - return Building(address=address, url=url, **building_info) + return Building( + site_name=self.site_name, address=address, url=url, **building_info + ) - @classmethod - def _get_single_property_page(cls, property_data: dict): + def _get_single_property_page(self, property_data: dict): """ This method is used when a user enters the exact location & zillow returns just one property """ @@ -104,8 +105,11 @@ def _get_single_property_page(cls, property_data: dict): state=address_data["state"], zip_code=address_data["zipcode"], ) + property_type = property_data.get("homeType", None) + print(property_type) return Property( + site_name=self.site_name, address=address, url=url, beds=property_data.get("bedrooms", None), @@ -121,7 +125,8 @@ def _get_single_property_page(cls, property_data: dict): "pricePerSquareFoot", None ), square_feet=property_data.get("livingArea", None), - listing_type=property_data.get("homeType", None), + property_type=PropertyType(property_type), + listing_type=self.listing_type, ) def _extract_building_info(self, home: dict) -> dict: diff --git a/poetry.lock b/poetry.lock index 421e83a..535d217 100644 --- a/poetry.lock +++ b/poetry.lock @@ -142,6 +142,81 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "numpy" +version = "1.25.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"}, + {file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"}, + {file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"}, + {file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"}, + {file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"}, + {file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"}, + {file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"}, + {file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"}, + {file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"}, + {file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"}, + {file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"}, + {file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"}, + {file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"}, + {file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"}, + {file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"}, + {file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"}, + {file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"}, + {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, +] + +[[package]] +name = "numpy" +version = "1.26.0" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = "<3.13,>=3.9" +files = [ + {file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"}, + {file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"}, + {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"}, + {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"}, + {file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"}, + {file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"}, + {file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"}, + {file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"}, + {file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"}, + {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"}, + {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"}, + {file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"}, + {file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"}, + {file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"}, + {file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"}, + {file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"}, + {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"}, + {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"}, + {file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"}, + {file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"}, + {file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"}, + {file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"}, + {file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"}, + {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"}, + {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"}, + {file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"}, + {file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"}, + {file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"}, + {file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"}, +] + [[package]] name = "packaging" version = "23.1" @@ -153,6 +228,67 @@ files = [ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, ] +[[package]] +name = "pandas" +version = "2.1.0" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:40dd20439ff94f1b2ed55b393ecee9cb6f3b08104c2c40b0cb7186a2f0046242"}, + {file = "pandas-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4f38e4fedeba580285eaac7ede4f686c6701a9e618d8a857b138a126d067f2f"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e6a0fe052cf27ceb29be9429428b4918f3740e37ff185658f40d8702f0b3e09"}, + {file = "pandas-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d81e1813191070440d4c7a413cb673052b3b4a984ffd86b8dd468c45742d3cc"}, + {file = "pandas-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eb20252720b1cc1b7d0b2879ffc7e0542dd568f24d7c4b2347cb035206936421"}, + {file = "pandas-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:38f74ef7ebc0ffb43b3d633e23d74882bce7e27bfa09607f3c5d3e03ffd9a4a5"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cda72cc8c4761c8f1d97b169661f23a86b16fdb240bdc341173aee17e4d6cedd"}, + {file = "pandas-2.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d97daeac0db8c993420b10da4f5f5b39b01fc9ca689a17844e07c0a35ac96b4b"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8c58b1113892e0c8078f006a167cc210a92bdae23322bb4614f2f0b7a4b510f"}, + {file = "pandas-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:629124923bcf798965b054a540f9ccdfd60f71361255c81fa1ecd94a904b9dd3"}, + {file = "pandas-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:70cf866af3ab346a10debba8ea78077cf3a8cd14bd5e4bed3d41555a3280041c"}, + {file = "pandas-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d53c8c1001f6a192ff1de1efe03b31a423d0eee2e9e855e69d004308e046e694"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:86f100b3876b8c6d1a2c66207288ead435dc71041ee4aea789e55ef0e06408cb"}, + {file = "pandas-2.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28f330845ad21c11db51e02d8d69acc9035edfd1116926ff7245c7215db57957"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9a6ccf0963db88f9b12df6720e55f337447aea217f426a22d71f4213a3099a6"}, + {file = "pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9"}, + {file = "pandas-2.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b31da36d376d50a1a492efb18097b9101bdbd8b3fbb3f49006e02d4495d4c644"}, + {file = "pandas-2.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:0164b85937707ec7f70b34a6c3a578dbf0f50787f910f21ca3b26a7fd3363437"}, + {file = "pandas-2.1.0.tar.gz", hash = "sha256:62c24c7fc59e42b775ce0679cfa7b14a5f9bfb7643cfbe708c960699e05fb918"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] +aws = ["s3fs (>=2022.05.0)"] +clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] +compression = ["zstandard (>=0.17.0)"] +computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2022.05.0)"] +gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] +hdf5 = ["tables (>=3.7.0)"] +html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] +mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] +spss = ["pyreadstat (>=1.1.5)"] +sql-other = ["SQLAlchemy (>=1.4.36)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.8.0)"] + [[package]] name = "pluggy" version = "1.3.0" @@ -190,6 +326,31 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2023.3.post1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -211,6 +372,17 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -222,6 +394,17 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + [[package]] name = "urllib3" version = "2.0.4" @@ -242,4 +425,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bc3567f9501f9e18bf9f53d8b4efe1e7e3fc2d750ceda2fbab165bfa22d49c64" +content-hash = "eede625d6d45085e143b0af246cb2ce00cff8579c667be3b63387c8594a5570d" diff --git a/pyproject.toml b/pyproject.toml index 01a6fd9..0f1198a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" requests = "^2.31.0" +pandas = "^2.1.0" [tool.poetry.group.dev.dependencies]