From d0ebaebd4b75089a30958f8f9f1e438b6ff79add Mon Sep 17 00:00:00 2001 From: Neha Nene Date: Thu, 7 Apr 2022 14:25:47 -0500 Subject: [PATCH] Revert "fix: support labels for schema validation (#260) (#381)" This reverts commit f787701dcb505fbced3e12b996c845148bbc1af0. --- data_validation/__main__.py | 2 +- data_validation/cli_tools.py | 9 ++++++--- data_validation/schema_validation.py | 15 +++++---------- tests/unit/test_schema_validation.py | 5 ----- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/data_validation/__main__.py b/data_validation/__main__.py index 35d9692c5..e69e88b0f 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -211,7 +211,7 @@ def build_config_managers_from_args(args): filter_config = cli_tools.get_filters(args.filters) if args.threshold: threshold = args.threshold - labels = cli_tools.get_labels(args.labels) + labels = cli_tools.get_labels(args.labels) mgr = state_manager.StateManager() source_client = clients.get_data_client(mgr.get_connection_config(args.source_conn)) diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 794335368..f81458489 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -409,6 +409,9 @@ def _configure_row_parser(row_parser): required=True, help="Comma separated list of primary key columns 'col_a,col_b'", ) + row_parser.add_argument( + "--labels", "-l", help="Key value pair labels for validation run" + ) row_parser.add_argument( "--threshold", "-th", @@ -491,6 +494,9 @@ def _configure_column_parser(column_parser): "-pk", help="Comma separated list of primary key columns 'col_a,col_b'", ) + column_parser.add_argument( + "--labels", "-l", help="Key value pair labels for validation run" + ) column_parser.add_argument( "--threshold", "-th", @@ -611,9 +617,6 @@ def _add_common_arguments(parser): parser.add_argument( "--bq-result-handler", "-bqrh", help="BigQuery result handler config details" ) - parser.add_argument( - "--labels", "-l", help="Key value pair labels for validation run" - ) parser.add_argument( "--service-account", "-sa", diff --git a/data_validation/schema_validation.py b/data_validation/schema_validation.py index b6b40a10e..7596ab65a 100644 --- a/data_validation/schema_validation.py +++ b/data_validation/schema_validation.py @@ -66,25 +66,20 @@ def execute(self): df.insert(loc=1, column="validation_name", value="Schema") df.insert(loc=2, column="validation_type", value="Schema") - df.insert( - loc=3, - column="labels", - value=[self.run_metadata.labels for _ in range(len(df.index))], - ) - df.insert(loc=4, column="start_time", value=self.run_metadata.start_time) - df.insert(loc=5, column="end_time", value=self.run_metadata.end_time) + df.insert(loc=3, column="start_time", value=self.run_metadata.start_time) + df.insert(loc=4, column="end_time", value=self.run_metadata.end_time) df.insert( - loc=6, + loc=5, column="source_table_name", value=self.config_manager.full_source_table, ) df.insert( - loc=7, + loc=6, column="target_table_name", value=self.config_manager.full_target_table, ) - df.insert(loc=10, column="aggregation_type", value="Schema") + df.insert(loc=9, column="aggregation_type", value="Schema") del df["error_result.details"] return df diff --git a/tests/unit/test_schema_validation.py b/tests/unit/test_schema_validation.py index dd2be0239..9ebbf9409 100644 --- a/tests/unit/test_schema_validation.py +++ b/tests/unit/test_schema_validation.py @@ -51,10 +51,6 @@ consts.CONFIG_AGGREGATES: [], consts.CONFIG_THRESHOLD: 0.0, consts.CONFIG_RESULT_HANDLER: None, - consts.CONFIG_LABELS: [ - ("label_1_name", "label_1_value"), - ("label_2_name", "label_2_value"), - ], consts.CONFIG_FORMAT: "table", } @@ -206,6 +202,5 @@ def test_execute(module_under_test, fs): assert len(result_df) == len(source_data[0]) + 1 assert result_df["source_agg_value"].astype(float).sum() == 7 assert result_df["target_agg_value"].astype(float).sum() == 7 - assert result_df.labels[0] == SAMPLE_SCHEMA_CONFIG[consts.CONFIG_LABELS] assert failures["source_column_name"].to_list() == ["id", "N/A"] assert failures["target_column_name"].to_list() == ["N/A", "id_new"]