From 1bbd6b68e0bd88a0f1f94b667d7be6185dd2e781 Mon Sep 17 00:00:00 2001 From: Neha Nene Date: Wed, 4 Aug 2021 11:40:39 -0500 Subject: [PATCH] Revert "feat: Allow user to specify a format for stdout (#242) (#293)" This reverts commit f0a9fa1e94e86def089c77912cf49911aa63cae1. --- README.md | 2 -- data_validation/__main__.py | 2 -- data_validation/cli_tools.py | 6 ----- data_validation/config_manager.py | 7 ------ data_validation/consts.py | 1 - data_validation/data_validation.py | 5 +--- data_validation/result_handlers/text.py | 27 ++------------------ setup.py | 1 - tests/system/data_sources/test_bigquery.py | 29 +++++----------------- tests/unit/result_handlers/test_text.py | 3 +-- 10 files changed, 10 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index ce0a7efe1..73130d39f 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,6 @@ data-validation run --labels or -l KEY1=VALUE1,KEY2=VALUE2 (Optional) Comma-separated key value pair labels for the run. --verbose or -v Verbose logging will print queries executed - --format or -fmt Format for stdout output, Supported formats are (text, csv, json, table) - It defaults to table. ``` The default aggregation type is a 'COUNT *'. If no aggregation flag (i.e count, diff --git a/data_validation/__main__.py b/data_validation/__main__.py index a5e7ee7c3..f2426f941 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -146,7 +146,6 @@ def build_config_managers_from_args(args): result_handler_config=result_handler_config, filter_config=filter_config, verbose=args.verbose, - format=args.format, ) configs.append(build_config_from_args(args, config_manager)) @@ -282,7 +281,6 @@ def run_validation(config_manager, verbose=False): """ validator = DataValidation( config_manager.config, - format=config_manager.format, validation_builder=None, result_handler=None, verbose=verbose, diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 0f8028260..84c5224e4 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -291,12 +291,6 @@ def _configure_run_parser(subparsers): "-filters", help="Filters in the format source_filter:target_filter", ) - run_parser.add_argument( - "--format", - "-fmt", - default="table", - help="Set the format for printing command output, Supported formats are (text, csv, json, table)", - ) def _configure_connection_parser(subparsers): diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index 7adae8c5e..cc37fee12 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -188,11 +188,6 @@ def threshold(self): """Return threshold from Config """ return self._config.get(consts.CONFIG_THRESHOLD, 0.0) - @property - def format(self): - """Return threshold from Config """ - return self._config.get(consts.CONFIG_FORMAT, "table") - def get_source_ibis_table(self): """Return IbisTable from source.""" if not hasattr(self, "_source_ibis_table"): @@ -274,7 +269,6 @@ def build_config_manager( table_obj, labels, threshold, - format, result_handler_config=None, filter_config=None, verbose=False, @@ -295,7 +289,6 @@ def build_config_manager( consts.CONFIG_THRESHOLD: threshold, consts.CONFIG_RESULT_HANDLER: result_handler_config, consts.CONFIG_FILTERS: filter_config, - consts.CONFIG_FORMAT: format, } # Only FileSystem connections do not require schemas diff --git a/data_validation/consts.py b/data_validation/consts.py index 0b7daa019..6b55eafbb 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -34,7 +34,6 @@ CONFIG_SOURCE_COLUMN = "source_column" CONFIG_TARGET_COLUMN = "target_column" CONFIG_THRESHOLD = "threshold" -CONFIG_FORMAT = "format" CONFIG_CAST = "cast" CONFIG_LIMIT = "limit" CONFIG_FILTERS = "filters" diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index 38c7daddc..2675dfdc0 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -39,7 +39,6 @@ class DataValidation(object): def __init__( self, config, - format="table", validation_builder=None, schema_validator=None, result_handler=None, @@ -59,8 +58,6 @@ def __init__( # Data Client Management self.config = config - self.format = format - self.source_client = clients.get_data_client( self.config[consts.CONFIG_SOURCE_CONN] ) @@ -105,7 +102,7 @@ def execute(self): ) # Call Result Handler to Manage Results - return self.result_handler.execute(self.config, self.format, result_df) + return self.result_handler.execute(self.config, result_df) def query_too_large(self, rows_df, grouped_fields): """ Return bool to dictate if another level of recursion diff --git a/data_validation/result_handlers/text.py b/data_validation/result_handlers/text.py index da7e2dbf0..58cbb8c76 100644 --- a/data_validation/result_handlers/text.py +++ b/data_validation/result_handlers/text.py @@ -23,31 +23,8 @@ """ -def print_formatted_(format, result_df): - """ - Utility for printing formatted results - :param result_df - :param format - """ - if format == "text": - print(result_df.to_string(index=False)) - elif format == "csv": - print(result_df.to_csv(index=False)) - elif format == "json": - print(result_df.to_json(orient="index")) - elif format == "table": - print(result_df.to_markdown(tablefmt="fancy_grid")) - else: - error_msg = ( - f"format [{format}] not supported, results printed in default(table) mode. " - f"Supported formats are [text, csv, json, table]" - ) - print(result_df.to_markdown(tablefmt="fancy_grid")) - raise ValueError(error_msg) - - class TextResultHandler(object): - def execute(self, config, format, result_df): - print_formatted_(format, result_df) + def execute(self, config, result_df): + print(result_df.to_string(index=False)) return result_df diff --git a/setup.py b/setup.py index 6935e797d..e384f0751 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,6 @@ "google-cloud-spanner==3.1.0", "setuptools>=34.0.0", "jellyfish==0.8.2", - "tabulate==0.8.9", ] extras_require = { diff --git a/tests/system/data_sources/test_bigquery.py b/tests/system/data_sources/test_bigquery.py index 2d7b95901..d9e980b73 100644 --- a/tests/system/data_sources/test_bigquery.py +++ b/tests/system/data_sources/test_bigquery.py @@ -14,10 +14,9 @@ import os -import pytest - -from data_validation import __main__ as main from data_validation import cli_tools, consts, data_validation +from data_validation import __main__ as main + BQ_CONN = {"source_type": "BigQuery", "project_id": os.environ["PROJECT_ID"]} CONFIG_COUNT_VALID = { @@ -179,9 +178,7 @@ def test_count_validator(): - validator = data_validation.DataValidation( - CONFIG_COUNT_VALID, format="text", verbose=True - ) + validator = data_validation.DataValidation(CONFIG_COUNT_VALID, verbose=True) df = validator.execute() count_value = df[df["validation_name"] == "count"]["source_agg_value"].values[0] @@ -210,9 +207,7 @@ def test_count_validator(): def test_grouped_count_validator(): - validator = data_validation.DataValidation( - CONFIG_GROUPED_COUNT_VALID, format="csv", verbose=True - ) + validator = data_validation.DataValidation(CONFIG_GROUPED_COUNT_VALID, verbose=True) df = validator.execute() rows = list(df[df["validation_name"] == "count"].iterrows()) @@ -228,9 +223,7 @@ def test_grouped_count_validator(): def test_numeric_types(): - validator = data_validation.DataValidation( - CONFIG_NUMERIC_AGG_VALID, format="json", verbose=True - ) + validator = data_validation.DataValidation(CONFIG_NUMERIC_AGG_VALID, verbose=True) df = validator.execute() for validation in df.to_dict(orient="records"): @@ -253,7 +246,7 @@ def test_cli_store_yaml_then_run(): # The number of lines is not significant, except that it represents # the exact file expected to be created. Any change to this value # is likely to be a breaking change and must be assessed. - assert len(yaml_file.readlines()) == 33 + assert len(yaml_file.readlines()) == 32 # Run generated config run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS) @@ -285,13 +278,3 @@ def _store_bq_conn(): def _remove_bq_conn(): file_path = cli_tools._get_connection_file(BQ_CONN_NAME) os.remove(file_path) - - -def test_unsupported_result_format(): - with pytest.raises(ValueError): - validator = data_validation.DataValidation( - CONFIG_GROUPED_COUNT_VALID, format="foobar", verbose=True - ) - df = validator.execute() - rows = list(df[df["validation_name"] == "count"].iterrows()) - assert len(rows) > 1 diff --git a/tests/unit/result_handlers/test_text.py b/tests/unit/result_handlers/test_text.py index cd11b1b0d..19741aa68 100644 --- a/tests/unit/result_handlers/test_text.py +++ b/tests/unit/result_handlers/test_text.py @@ -38,9 +38,8 @@ def test_import(module_under_test): def test_basic_result_handler(module_under_test): """Test basic handler executes """ - format = "json" result_df = DataFrame(SAMPLE_RESULT_DATA) result_handler = module_under_test.TextResultHandler() - handler_output = result_handler.execute(SAMPLE_CONFIG, format, result_df) + handler_output = result_handler.execute(SAMPLE_CONFIG, result_df) assert handler_output["count"].sum() == result_df["count"].sum()