diff --git a/README.md b/README.md index 2fd8e63a..4c3d9d62 100644 --- a/README.md +++ b/README.md @@ -309,12 +309,12 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query col Source sql query Either --source-query or --source-query-file must be provided --source-query-file SOURCE_QUERY_FILE, -sqf SOURCE_QUERY_FILE - File containing the source sql commands + File containing the source sql command. Supports GCS and local paths. --target-query TARGET_QUERY, -tq TARGET_QUERY Target sql query Either --target-query or --target-query-file must be provided --target-query-file TARGET_QUERY_FILE, -tqf TARGET_QUERY_FILE - File containing the target sql commands + File containing the target sql command. Supports GCS and local paths. [--count COLUMNS] Comma separated list of columns for count or * for all columns [--sum COLUMNS] Comma separated list of columns for sum or * for all numeric [--min COLUMNS] Comma separated list of columns for min or * for all numeric @@ -371,12 +371,12 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query row Source sql query Either --source-query or --source-query-file must be provided --source-query-file SOURCE_QUERY_FILE, -sqf SOURCE_QUERY_FILE - File containing the source sql commands + File containing the source sql command. Supports GCS and local paths. --target-query TARGET_QUERY, -tq TARGET_QUERY Target sql query Either --target-query or --target-query-file must be provided --target-query-file TARGET_QUERY_FILE, -tqf TARGET_QUERY_FILE - File containing the target sql commands + File containing the target sql command. Supports GCS and local paths. --comparison-fields or -comp-fields FIELDS Comma separated list of columns to compare. Can either be a physical column or an alias See: *Calculated Fields* section for details diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index ed4343bc..c2cc0c6e 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -1034,8 +1034,7 @@ def get_query_from_file(self, filename): """Return query from input file""" query = "" try: - file = open(filename, "r") - query = file.read() + query = gcs_helper.read_file(filename, download_as_text=True) query = query.rstrip(";\n") except IOError: logging.error("Cannot read query file: ", filename) @@ -1045,7 +1044,6 @@ def get_query_from_file(self, filename): "Expected file with sql query, got empty file or file with white spaces. " f"input file: {filename}" ) - file.close() return query def get_query_from_inline(self, inline_query): diff --git a/data_validation/gcs_helper.py b/data_validation/gcs_helper.py index bbdc5b2f..e7ee6f7b 100644 --- a/data_validation/gcs_helper.py +++ b/data_validation/gcs_helper.py @@ -50,12 +50,12 @@ def _get_gcs_file_path(gcs_file_path: str) -> str: return "".join(gcs_file_path[5:].split("/", 1)[1:]) -def _read_gcs_file(file_path: str) -> bytes: +def _read_gcs_file(file_path: str, download_as_text: bool = False): gcs_bucket = get_gcs_bucket(file_path) blob = gcs_bucket.blob(_get_gcs_file_path(file_path)) if not blob: raise ValueError(f"Invalid Cloud Storage Path: {file_path}") - return blob.download_as_bytes() + return blob.download_as_text() if download_as_text else blob.download_as_bytes() def _write_gcs_file(file_path: str, data: str): @@ -64,9 +64,9 @@ def _write_gcs_file(file_path: str, data: str): blob.upload_from_string(data) -def read_file(file_path: str): +def read_file(file_path: str, download_as_text: bool = False): if _is_gcs_path(file_path): - return _read_gcs_file(file_path) + return _read_gcs_file(file_path, download_as_text) else: with open(file_path, "r") as f: return f.read() diff --git a/tests/system/test_state_manager.py b/tests/system/test_state_manager.py index ff8f52d7..69eb759c 100644 --- a/tests/system/test_state_manager.py +++ b/tests/system/test_state_manager.py @@ -67,6 +67,9 @@ def test_read_and_write_gcs_file(): data = gcs_helper.read_file(GCS_STATE_FULL_PATH) assert data == b"TEST_DATA" + data = gcs_helper.read_file(GCS_STATE_FULL_PATH, download_as_text=True) + assert data == "TEST_DATA" + def test_list_gcs_dir(): gcs_helper.write_file(GCS_STATE_FULL_PATH, TEST_DATA)