From 5873de9163c4b0155356649d9d17de431a18e11c Mon Sep 17 00:00:00 2001 From: "A.J. Welch" Date: Sun, 27 Feb 2022 04:41:58 +0000 Subject: [PATCH] fix: #260 --- data_validation/__main__.py | 2 +- data_validation/cli_tools.py | 6 +++--- data_validation/schema_validation.py | 15 ++++++++++----- tests/unit/test_schema_validation.py | 5 +++++ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/data_validation/__main__.py b/data_validation/__main__.py index d37898b14..b31b3d10b 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -145,7 +145,7 @@ def build_config_managers_from_args(args): filter_config = cli_tools.get_filters(args.filters) if args.threshold: threshold = args.threshold - labels = cli_tools.get_labels(args.labels) + labels = cli_tools.get_labels(args.labels) mgr = state_manager.StateManager() source_client = clients.get_data_client(mgr.get_connection_config(args.source_conn)) diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index c8bf4e42e..7571412d6 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -452,9 +452,6 @@ def _configure_column_parser(column_parser): "-pk", help="Comma separated list of primary key columns 'col_a,col_b'", ) - column_parser.add_argument( - "--labels", "-l", help="Key value pair labels for validation run" - ) column_parser.add_argument( "--threshold", "-th", @@ -495,6 +492,9 @@ def _add_common_arguments(parser): parser.add_argument( "--bq-result-handler", "-bqrh", help="BigQuery result handler config details" ) + parser.add_argument( + "--labels", "-l", help="Key value pair labels for validation run" + ) parser.add_argument( "--service-account", "-sa", diff --git a/data_validation/schema_validation.py b/data_validation/schema_validation.py index cdb60087e..562b7a07b 100644 --- a/data_validation/schema_validation.py +++ b/data_validation/schema_validation.py @@ -66,20 +66,25 @@ def execute(self): df.insert(loc=1, column="validation_name", value="Schema") df.insert(loc=2, column="validation_type", value="Schema") - df.insert(loc=3, column="start_time", value=self.run_metadata.start_time) - df.insert(loc=4, column="end_time", value=self.run_metadata.end_time) + df.insert( + loc=3, + column="labels", + value=[self.run_metadata.labels for _ in range(len(df.index))], + ) + df.insert(loc=4, column="start_time", value=self.run_metadata.start_time) + df.insert(loc=5, column="end_time", value=self.run_metadata.end_time) df.insert( - loc=5, + loc=6, column="source_table_name", value=self.config_manager.full_source_table, ) df.insert( - loc=6, + loc=7, column="target_table_name", value=self.config_manager.full_target_table, ) - df.insert(loc=9, column="aggregation_type", value="Schema") + df.insert(loc=10, column="aggregation_type", value="Schema") del df["error_result.details"] return df diff --git a/tests/unit/test_schema_validation.py b/tests/unit/test_schema_validation.py index f7987fa36..4e52f0475 100644 --- a/tests/unit/test_schema_validation.py +++ b/tests/unit/test_schema_validation.py @@ -51,6 +51,10 @@ consts.CONFIG_AGGREGATES: [], consts.CONFIG_THRESHOLD: 0.0, consts.CONFIG_RESULT_HANDLER: None, + consts.CONFIG_LABELS: [ + ("label_1_name", "label_1_value"), + ("label_2_name", "label_2_value"), + ], consts.CONFIG_FORMAT: "table", } @@ -193,5 +197,6 @@ def test_execute(module_under_test, fs): assert len(result_df) == len(source_data[0]) + 1 assert result_df["source_agg_value"].astype(float).sum() == 7 assert result_df["target_agg_value"].astype(float).sum() == 7 + assert result_df.labels[0] == SAMPLE_SCHEMA_CONFIG[consts.CONFIG_LABELS] assert failures["source_column_name"].to_list() == ["id", "N/A"] assert failures["target_column_name"].to_list() == ["N/A", "id_new"]