Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ajwelch4 committed Feb 27, 2022
1 parent 1a64eac commit 5873de9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 9 deletions.
2 changes: 1 addition & 1 deletion data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def build_config_managers_from_args(args):
filter_config = cli_tools.get_filters(args.filters)
if args.threshold:
threshold = args.threshold
labels = cli_tools.get_labels(args.labels)
labels = cli_tools.get_labels(args.labels)

mgr = state_manager.StateManager()
source_client = clients.get_data_client(mgr.get_connection_config(args.source_conn))
Expand Down
6 changes: 3 additions & 3 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,9 +452,6 @@ def _configure_column_parser(column_parser):
"-pk",
help="Comma separated list of primary key columns 'col_a,col_b'",
)
column_parser.add_argument(
"--labels", "-l", help="Key value pair labels for validation run"
)
column_parser.add_argument(
"--threshold",
"-th",
Expand Down Expand Up @@ -495,6 +492,9 @@ def _add_common_arguments(parser):
parser.add_argument(
"--bq-result-handler", "-bqrh", help="BigQuery result handler config details"
)
parser.add_argument(
"--labels", "-l", help="Key value pair labels for validation run"
)
parser.add_argument(
"--service-account",
"-sa",
Expand Down
15 changes: 10 additions & 5 deletions data_validation/schema_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,25 @@ def execute(self):
df.insert(loc=1, column="validation_name", value="Schema")
df.insert(loc=2, column="validation_type", value="Schema")

df.insert(loc=3, column="start_time", value=self.run_metadata.start_time)
df.insert(loc=4, column="end_time", value=self.run_metadata.end_time)
df.insert(
loc=3,
column="labels",
value=[self.run_metadata.labels for _ in range(len(df.index))],
)
df.insert(loc=4, column="start_time", value=self.run_metadata.start_time)
df.insert(loc=5, column="end_time", value=self.run_metadata.end_time)

df.insert(
loc=5,
loc=6,
column="source_table_name",
value=self.config_manager.full_source_table,
)
df.insert(
loc=6,
loc=7,
column="target_table_name",
value=self.config_manager.full_target_table,
)
df.insert(loc=9, column="aggregation_type", value="Schema")
df.insert(loc=10, column="aggregation_type", value="Schema")

del df["error_result.details"]
return df
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/test_schema_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@
consts.CONFIG_AGGREGATES: [],
consts.CONFIG_THRESHOLD: 0.0,
consts.CONFIG_RESULT_HANDLER: None,
consts.CONFIG_LABELS: [
("label_1_name", "label_1_value"),
("label_2_name", "label_2_value"),
],
consts.CONFIG_FORMAT: "table",
}

Expand Down Expand Up @@ -193,5 +197,6 @@ def test_execute(module_under_test, fs):
assert len(result_df) == len(source_data[0]) + 1
assert result_df["source_agg_value"].astype(float).sum() == 7
assert result_df["target_agg_value"].astype(float).sum() == 7
assert result_df.labels[0] == SAMPLE_SCHEMA_CONFIG[consts.CONFIG_LABELS]
assert failures["source_column_name"].to_list() == ["id", "N/A"]
assert failures["target_column_name"].to_list() == ["N/A", "id_new"]

0 comments on commit 5873de9

Please sign in to comment.