diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 2962f0c..6d0f8ab 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -30,4 +30,4 @@ jobs: if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 41dd5d2..eab3b40 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ **/.pytest_cache/ *.pyc /.ipynb_checkpoints/ -*.csv \ No newline at end of file +*.csv diff --git a/README.md b/README.md index 2018d70..b0979df 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ ```bash pip install -U homeharvest ``` - _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ + _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ ## Usage @@ -39,11 +39,11 @@ properties = scrape_property( location="San Diego, CA", listing_type="sold", # or (for_sale, for_rent, pending) past_days=30, # sold in last 30 days - listed in last 30 days if (for_sale, for_rent) - - # date_from="2023-05-01", # alternative to past_days - # date_to="2023-05-28", + + # date_from="2023-05-01", # alternative to past_days + # date_to="2023-05-28", # foreclosure=True - + # mls_only=True, # only fetch MLS listings ) print(f"Number of properties: {len(properties)}") @@ -84,7 +84,7 @@ Optional │ ├── date_from, date_to (string): Start and end dates to filter properties listed or sold, both dates are required. | (use this to get properties in chunks as there's a 10k result limit) -│ Format for both must be "YYYY-MM-DD". +│ Format for both must be "YYYY-MM-DD". │ Example: "2023-05-01", "2023-05-15" (fetches properties listed/sold between these dates) │ ├── mls_only (True/False): If set, fetches only MLS listings (mainly applicable to 'sold' listings) @@ -128,14 +128,17 @@ Property │ ├── sold_price │ ├── last_sold_date │ ├── price_per_sqft +│ ├── parking_garage │ └── hoa_fee ├── Location Details: │ ├── latitude │ ├── longitude -└── Parking Details: - └── parking_garage +├── Agent Info: +│ ├── agent +│ ├── broker +│ └── broker_phone ``` ### Exceptions @@ -143,4 +146,3 @@ The following exceptions may be raised when using HomeHarvest: - `InvalidListingType` - valid options: `for_sale`, `for_rent`, `sold` - `InvalidDate` - date_from or date_to is not in the format YYYY-MM-DD - diff --git a/homeharvest/cli.py b/homeharvest/cli.py index 950c1e4..342c030 100644 --- a/homeharvest/cli.py +++ b/homeharvest/cli.py @@ -5,9 +5,7 @@ def main(): parser = argparse.ArgumentParser(description="Home Harvest Property Scraper") - parser.add_argument( - "location", type=str, help="Location to scrape (e.g., San Francisco, CA)" - ) + parser.add_argument("location", type=str, help="Location to scrape (e.g., San Francisco, CA)") parser.add_argument( "-l", @@ -35,9 +33,7 @@ def main(): help="Name of the output file (without extension)", ) - parser.add_argument( - "-p", "--proxy", type=str, default=None, help="Proxy to use for scraping" - ) + parser.add_argument("-p", "--proxy", type=str, default=None, help="Proxy to use for scraping") parser.add_argument( "-d", "--days", diff --git a/homeharvest/core/scrapers/__init__.py b/homeharvest/core/scrapers/__init__.py index a0bd471..2b824ba 100644 --- a/homeharvest/core/scrapers/__init__.py +++ b/homeharvest/core/scrapers/__init__.py @@ -1,5 +1,6 @@ from dataclasses import dataclass import requests +import uuid from .models import Property, ListingType, SiteName @@ -27,6 +28,12 @@ def __init__( if not session: self.session = requests.Session() + self.session.headers.update( + { + "auth": f"Bearer {self.get_access_token()}", + "apollographql-client-name": "com.move.Realtor-apollo-ios", + } + ) else: self.session = session @@ -43,12 +50,26 @@ def __init__( self.date_to = scraper_input.date_to self.foreclosure = scraper_input.foreclosure - def search(self) -> list[Property]: - ... + def search(self) -> list[Property]: ... @staticmethod - def _parse_home(home) -> Property: - ... + def _parse_home(home) -> Property: ... - def handle_location(self): - ... + def handle_location(self): ... + + def get_access_token(self): + url = "https://graph.realtor.com/auth/token" + + payload = f'{{"client_app_id":"rdc_mobile_native,24.20.4.149916,iphone","device_id":"{str(uuid.uuid4()).upper()}","grant_type":"device_mobile"}}' + headers = { + "Host": "graph.realtor.com", + "x-client-version": "24.20.4.149916", + "accept": "*/*", + "content-type": "Application/json", + "user-agent": "Realtor.com/24.20.4.149916 CFNetwork/1410.0.3 Darwin/22.6.0", + "accept-language": "en-US,en;q=0.9", + } + response = requests.post(url, headers=headers, data=payload) + + data = response.json() + return data["access_token"] diff --git a/homeharvest/core/scrapers/models.py b/homeharvest/core/scrapers/models.py index 8497a93..c54417c 100644 --- a/homeharvest/core/scrapers/models.py +++ b/homeharvest/core/scrapers/models.py @@ -69,6 +69,12 @@ class Description: stories: int | None = None +@dataclass +class Agent: + name: str | None = None + phone: str | None = None + + @dataclass class Property: property_url: str @@ -89,3 +95,5 @@ class Property: latitude: float | None = None longitude: float | None = None neighborhoods: Optional[str] = None + + agents: list[Agent] = None diff --git a/homeharvest/exceptions.py b/homeharvest/exceptions.py index 0d71398..c3f5111 100644 --- a/homeharvest/exceptions.py +++ b/homeharvest/exceptions.py @@ -1,5 +1,6 @@ class InvalidListingType(Exception): """Raised when a provided listing type is does not exist.""" + class InvalidDate(Exception): - """Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23 """ + """Raised when only one of date_from or date_to is provided or not in the correct format. ex: 2023-10-23""" diff --git a/homeharvest/utils.py b/homeharvest/utils.py index 6397594..74def20 100644 --- a/homeharvest/utils.py +++ b/homeharvest/utils.py @@ -31,6 +31,9 @@ "stories", "hoa_fee", "parking_garage", + "agent", + "broker", + "broker_phone", "primary_photo", "alt_photos", ] @@ -48,6 +51,14 @@ def process_result(result: Property) -> pd.DataFrame: prop_data["state"] = address_data.state prop_data["zip_code"] = address_data.zip + if "agents" in prop_data: + agents = prop_data["agents"] + if agents: + prop_data["agent"] = agents[0].name + if len(agents) > 1: + prop_data["broker"] = agents[1].name + prop_data["broker_phone"] = agents[1].phone + prop_data["price_per_sqft"] = prop_data["prc_sqft"] description = result.description @@ -72,9 +83,7 @@ def process_result(result: Property) -> pd.DataFrame: def validate_input(listing_type: str) -> None: if listing_type.upper() not in ListingType.__members__: - raise InvalidListingType( - f"Provided listing type, '{listing_type}', does not exist." - ) + raise InvalidListingType(f"Provided listing type, '{listing_type}', does not exist.") def validate_dates(date_from: str | None, date_to: str | None) -> None: diff --git a/poetry.lock b/poetry.lock index 8bb01ef..8445afa 100644 --- a/poetry.lock +++ b/poetry.lock @@ -11,6 +11,17 @@ files = [ {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, ] +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "charset-normalizer" version = "3.3.0" @@ -121,6 +132,17 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + [[package]] name = "exceptiongroup" version = "1.1.3" @@ -135,6 +157,36 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "filelock" +version = "3.13.4" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.13.4-py3-none-any.whl", hash = "sha256:404e5e9253aa60ad457cae1be07c0f0ca90a63931200a47d9b6a6af84fd7b45f"}, + {file = "filelock-3.13.4.tar.gz", hash = "sha256:d13f466618bfde72bd2c18255e269f72542c6e70e7bac83a0232d6b1cc5c8cf4"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] +typing = ["typing-extensions (>=4.8)"] + +[[package]] +name = "identify" +version = "2.5.35" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.35-py2.py3-none-any.whl", hash = "sha256:c4de0081837b211594f8e877a6b4fad7ca32bbfc1a9307fdd61c28bfe923f13e"}, + {file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.4" @@ -157,6 +209,20 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "numpy" version = "1.26.0" @@ -277,6 +343,21 @@ sql-other = ["SQLAlchemy (>=1.4.36)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.8.0)"] +[[package]] +name = "platformdirs" +version = "4.2.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, + {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] + [[package]] name = "pluggy" version = "1.3.0" @@ -292,6 +373,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "3.7.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.7.0-py2.py3-none-any.whl", hash = "sha256:5eae9e10c2b5ac51577c3452ec0a490455c45a0533f7960f993a0d01e59decab"}, + {file = "pre_commit-3.7.0.tar.gz", hash = "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "pytest" version = "7.4.2" @@ -339,6 +438,66 @@ files = [ {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + [[package]] name = "requests" version = "2.31.0" @@ -360,6 +519,22 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "setuptools" +version = "69.5.1" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"}, + {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -410,7 +585,27 @@ secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17. socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "virtualenv" +version = "20.25.1" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.25.1-py3-none-any.whl", hash = "sha256:961c026ac520bac5f69acb8ea063e8a4f071bcc9457b9c1f28f6b085c511583a"}, + {file = "virtualenv-20.25.1.tar.gz", hash = "sha256:e08e13ecdca7a0bd53798f356d5831434afa5b07b93f0abdf0797b7a06ffe197"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "018a1a6afb2d7f4c764b9e1926145d7d8d630ffa43f7786e062cbfd9a9a845a0" +content-hash = "371781da268d5f61d6e798c023777f337b620e9b07a48c316825d7b998b63f02" diff --git a/pyproject.toml b/pyproject.toml index 774b3c6..7ca185d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ pandas = "^2.1.1" [tool.poetry.group.dev.dependencies] pytest = "^7.4.2" +pre-commit = "^3.7.0" [build-system] requires = ["poetry-core"] diff --git a/tests/test_realtor.py b/tests/test_realtor.py index f6cf55f..01e0e6c 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -1,22 +1,12 @@ from homeharvest import scrape_property -from homeharvest.exceptions import ( - InvalidListingType, -) def test_realtor_pending_or_contingent(): - pending_or_contingent_result = scrape_property( - location="Surprise, AZ", listing_type="pending" - ) + pending_or_contingent_result = scrape_property(location="Surprise, AZ", listing_type="pending") regular_result = scrape_property(location="Surprise, AZ", listing_type="for_sale") - assert all( - [ - result is not None - for result in [pending_or_contingent_result, regular_result] - ] - ) + assert all([result is not None for result in [pending_or_contingent_result, regular_result]]) assert len(pending_or_contingent_result) != len(regular_result) @@ -71,17 +61,13 @@ def test_realtor_comps(): def test_realtor_last_x_days_sold(): - days_result_30 = scrape_property( - location="Dallas, TX", listing_type="sold", past_days=30 - ) + days_result_30 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=30) - days_result_10 = scrape_property( - location="Dallas, TX", listing_type="sold", past_days=10 - ) + days_result_10 = scrape_property(location="Dallas, TX", listing_type="sold", past_days=10) - assert all( - [result is not None for result in [days_result_30, days_result_10]] - ) and len(days_result_30) != len(days_result_10) + assert all([result is not None for result in [days_result_30, days_result_10]]) and len(days_result_30) != len( + days_result_10 + ) def test_realtor_date_range_sold(): @@ -93,9 +79,9 @@ def test_realtor_date_range_sold(): location="Dallas, TX", listing_type="sold", date_from="2023-04-01", date_to="2023-06-10" ) - assert all( - [result is not None for result in [days_result_30, days_result_60]] - ) and len(days_result_30) < len(days_result_60) + assert all([result is not None for result in [days_result_30, days_result_60]]) and len(days_result_30) < len( + days_result_60 + ) def test_realtor_single_property(): @@ -119,12 +105,8 @@ def test_realtor(): location="2530 Al Lipscomb Way", listing_type="for_sale", ), - scrape_property( - location="Phoenix, AZ", listing_type="for_rent" - ), #: does not support "city, state, USA" format - scrape_property( - location="Dallas, TX", listing_type="sold" - ), #: does not support "city, state, USA" format + scrape_property(location="Phoenix, AZ", listing_type="for_rent"), #: does not support "city, state, USA" format + scrape_property(location="Dallas, TX", listing_type="sold"), #: does not support "city, state, USA" format scrape_property(location="85281"), ] @@ -142,21 +124,21 @@ def test_realtor_city(): def test_realtor_bad_address(): bad_results = scrape_property( - location="abceefg ju098ot498hh9", - listing_type="for_sale", - ) + location="abceefg ju098ot498hh9", + listing_type="for_sale", + ) if len(bad_results) == 0: assert True def test_realtor_foreclosed(): - foreclosed = scrape_property( - location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=True - ) + foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=True) - not_foreclosed = scrape_property( - location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=False - ) + not_foreclosed = scrape_property(location="Dallas, TX", listing_type="for_sale", past_days=100, foreclosure=False) assert len(foreclosed) != len(not_foreclosed) + +def test_realtor_agent(): + scraped = scrape_property(location="Detroit, MI", listing_type="for_sale") + assert scraped["agent"].nunique() > 1