Skip to content

Commit

Permalink
feat: Support GCS custom query files (#1155)
Browse files Browse the repository at this point in the history
* upgrade package versions

* feat: support gcs query files

* add test and update docs

---------

Co-authored-by: Helen Cristina <[email protected]>
  • Loading branch information
nehanene15 and helensilva14 committed Jun 3, 2024
1 parent 9946fcb commit e3fe3d1
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 11 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,12 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query col
Source sql query
Either --source-query or --source-query-file must be provided
--source-query-file SOURCE_QUERY_FILE, -sqf SOURCE_QUERY_FILE
File containing the source sql commands
File containing the source sql command. Supports GCS and local paths.
--target-query TARGET_QUERY, -tq TARGET_QUERY
Target sql query
Either --target-query or --target-query-file must be provided
--target-query-file TARGET_QUERY_FILE, -tqf TARGET_QUERY_FILE
File containing the target sql commands
File containing the target sql command. Supports GCS and local paths.
[--count COLUMNS] Comma separated list of columns for count or * for all columns
[--sum COLUMNS] Comma separated list of columns for sum or * for all numeric
[--min COLUMNS] Comma separated list of columns for min or * for all numeric
Expand Down Expand Up @@ -371,12 +371,12 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query row
Source sql query
Either --source-query or --source-query-file must be provided
--source-query-file SOURCE_QUERY_FILE, -sqf SOURCE_QUERY_FILE
File containing the source sql commands
File containing the source sql command. Supports GCS and local paths.
--target-query TARGET_QUERY, -tq TARGET_QUERY
Target sql query
Either --target-query or --target-query-file must be provided
--target-query-file TARGET_QUERY_FILE, -tqf TARGET_QUERY_FILE
File containing the target sql commands
File containing the target sql command. Supports GCS and local paths.
--comparison-fields or -comp-fields FIELDS
Comma separated list of columns to compare. Can either be a physical column or an alias
See: *Calculated Fields* section for details
Expand Down
4 changes: 1 addition & 3 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,8 +1034,7 @@ def get_query_from_file(self, filename):
"""Return query from input file"""
query = ""
try:
file = open(filename, "r")
query = file.read()
query = gcs_helper.read_file(filename, download_as_text=True)
query = query.rstrip(";\n")
except IOError:
logging.error("Cannot read query file: ", filename)
Expand All @@ -1045,7 +1044,6 @@ def get_query_from_file(self, filename):
"Expected file with sql query, got empty file or file with white spaces. "
f"input file: {filename}"
)
file.close()
return query

def get_query_from_inline(self, inline_query):
Expand Down
8 changes: 4 additions & 4 deletions data_validation/gcs_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ def _get_gcs_file_path(gcs_file_path: str) -> str:
return "".join(gcs_file_path[5:].split("/", 1)[1:])


def _read_gcs_file(file_path: str) -> bytes:
def _read_gcs_file(file_path: str, download_as_text: bool = False):
gcs_bucket = get_gcs_bucket(file_path)
blob = gcs_bucket.blob(_get_gcs_file_path(file_path))
if not blob:
raise ValueError(f"Invalid Cloud Storage Path: {file_path}")
return blob.download_as_bytes()
return blob.download_as_text() if download_as_text else blob.download_as_bytes()


def _write_gcs_file(file_path: str, data: str):
Expand All @@ -64,9 +64,9 @@ def _write_gcs_file(file_path: str, data: str):
blob.upload_from_string(data)


def read_file(file_path: str):
def read_file(file_path: str, download_as_text: bool = False):
if _is_gcs_path(file_path):
return _read_gcs_file(file_path)
return _read_gcs_file(file_path, download_as_text)
else:
with open(file_path, "r") as f:
return f.read()
Expand Down
3 changes: 3 additions & 0 deletions tests/system/test_state_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def test_read_and_write_gcs_file():
data = gcs_helper.read_file(GCS_STATE_FULL_PATH)
assert data == b"TEST_DATA"

data = gcs_helper.read_file(GCS_STATE_FULL_PATH, download_as_text=True)
assert data == "TEST_DATA"


def test_list_gcs_dir():
gcs_helper.write_file(GCS_STATE_FULL_PATH, TEST_DATA)
Expand Down

0 comments on commit e3fe3d1

Please sign in to comment.