Skip to content

Commit

Permalink
fix: use an appropriate column filter list for schema validation (Goo…
Browse files Browse the repository at this point in the history
  • Loading branch information
ajwelch4 committed Feb 18, 2022
1 parent 100b3ea commit 18d83be
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 3 deletions.
11 changes: 9 additions & 2 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ def build_config_managers_from_args(args):

format = args.format if args.format else "table"

use_random_rows = (
None if config_type == consts.SCHEMA_VALIDATION else args.use_random_row
)
random_row_batch_size = (
None if config_type == consts.SCHEMA_VALIDATION else args.random_row_batch_size
)

is_filesystem = source_client._source_type == "FileSystem"
tables_list = cli_tools.get_tables_list(
args.tables_list, default_value=[], is_filesystem=is_filesystem
Expand All @@ -167,8 +174,8 @@ def build_config_managers_from_args(args):
labels,
threshold,
format,
use_random_rows=args.use_random_row,
random_row_batch_size=args.random_row_batch_size,
use_random_rows=use_random_rows,
random_row_batch_size=random_row_batch_size,
source_client=source_client,
target_client=target_client,
result_handler_config=result_handler_config,
Expand Down
8 changes: 7 additions & 1 deletion data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,13 @@ def get_yaml_validation_block(self):
def get_result_handler(self):
"""Return ResultHandler instance from supplied config."""
if not self.result_handler_config:
return TextResultHandler(self._config.get(consts.CONFIG_FORMAT, "table"))
if self.config[consts.CONFIG_TYPE] == consts.SCHEMA_VALIDATION:
cols_filter_list = consts.SCHEMA_VALIDATION_COLUMN_FILTER_LIST
else:
cols_filter_list = consts.COLUMN_FILTER_LIST
return TextResultHandler(
self._config.get(consts.CONFIG_FORMAT, "table"), cols_filter_list
)

result_type = self.result_handler_config[consts.CONFIG_TYPE]
if result_type == "BigQuery":
Expand Down
8 changes: 8 additions & 0 deletions data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,11 @@
"run_id",
"start_time",
]
SCHEMA_VALIDATION_COLUMN_FILTER_LIST = [
"run_id",
"start_time",
"end_time",
"aggregation_type",
"source_agg_value",
"target_agg_value",
]
20 changes: 20 additions & 0 deletions tests/system/data_sources/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,18 @@
consts.CONFIG_FORMAT: "table",
}

CONFIG_SCHEMA_VALIDATION = {
# BigQuery Specific Connection Config
consts.CONFIG_SOURCE_CONN: BQ_CONN,
consts.CONFIG_TARGET_CONN: BQ_CONN,
# Validation Type
consts.CONFIG_TYPE: "Schema",
# Configuration Required Depending on Validator Type
consts.CONFIG_SCHEMA_NAME: "bigquery-public-data.new_york_citibike",
consts.CONFIG_TABLE_NAME: "citibike_trips",
consts.CONFIG_FORMAT: "table",
}

BQ_CONN_NAME = "bq-integration-test"
CLI_CONFIG_FILE = "example_test.yaml"

Expand Down Expand Up @@ -237,6 +249,14 @@ def test_numeric_types():
)


def test_schema_validation():
validator = data_validation.DataValidation(CONFIG_SCHEMA_VALIDATION, verbose=True)
df = validator.execute()

for validation in df.to_dict(orient="records"):
assert validation["status"] == "Pass"


def test_cli_store_yaml_then_run():
# Store BQ Connection
_store_bq_conn()
Expand Down

0 comments on commit 18d83be

Please sign in to comment.