From fb76f44c06583ad4a3a42971dcd55374bcf54ada Mon Sep 17 00:00:00 2001 From: Yogesh Tewari Date: Thu, 5 Aug 2021 14:26:16 -0400 Subject: [PATCH] feat: Allow user to specify a format for stdout(#242) --- README.md | 2 ++ data_validation/__main__.py | 1 + data_validation/cli_tools.py | 7 +++++ data_validation/config_manager.py | 5 ++-- data_validation/consts.py | 1 + data_validation/result_handlers/text.py | 27 ++++++++++++++++++-- setup.py | 1 + tests/system/data_sources/test_bigquery.py | 5 +++- tests/system/data_sources/test_mysql.py | 1 + tests/system/data_sources/test_postgres.py | 1 + tests/system/data_sources/test_spanner.py | 2 ++ tests/system/data_sources/test_sql_server.py | 1 + tests/system/data_sources/test_teradata.py | 1 + 13 files changed, 50 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 73130d39f..ce0a7efe1 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,8 @@ data-validation run --labels or -l KEY1=VALUE1,KEY2=VALUE2 (Optional) Comma-separated key value pair labels for the run. --verbose or -v Verbose logging will print queries executed + --format or -fmt Format for stdout output, Supported formats are (text, csv, json, table) + It defaults to table. ``` The default aggregation type is a 'COUNT *'. If no aggregation flag (i.e count, diff --git a/data_validation/__main__.py b/data_validation/__main__.py index f2426f941..fc5266d68 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -143,6 +143,7 @@ def build_config_managers_from_args(args): table_obj, labels, threshold, + format=args.format, result_handler_config=result_handler_config, filter_config=filter_config, verbose=args.verbose, diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 84c5224e4..40874846e 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -291,6 +291,13 @@ def _configure_run_parser(subparsers): "-filters", help="Filters in the format source_filter:target_filter", ) + run_parser.add_argument( + "--format", + "-fmt", + default="table", + help="Set the format for printing command output, Supported formats are (text, csv, json, table). It defaults " + "to table", + ) def _configure_connection_parser(subparsers): diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index cc37fee12..3bbc31d8a 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -24,7 +24,6 @@ class ConfigManager(object): - _config: dict = None source_client = None target_client = None @@ -238,7 +237,7 @@ def get_yaml_validation_block(self): def get_result_handler(self): """Return ResultHandler instance from supplied config.""" if not self.result_handler_config: - return TextResultHandler() + return TextResultHandler(self._config[consts.CONFIG_FORMAT]) result_type = self.result_handler_config[consts.CONFIG_TYPE] if result_type == "BigQuery": @@ -269,6 +268,7 @@ def build_config_manager( table_obj, labels, threshold, + format, result_handler_config=None, filter_config=None, verbose=False, @@ -287,6 +287,7 @@ def build_config_manager( ), consts.CONFIG_LABELS: labels, consts.CONFIG_THRESHOLD: threshold, + consts.CONFIG_FORMAT: format, consts.CONFIG_RESULT_HANDLER: result_handler_config, consts.CONFIG_FILTERS: filter_config, } diff --git a/data_validation/consts.py b/data_validation/consts.py index 6b55eafbb..76ee444b5 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -35,6 +35,7 @@ CONFIG_TARGET_COLUMN = "target_column" CONFIG_THRESHOLD = "threshold" CONFIG_CAST = "cast" +CONFIG_FORMAT = "format" CONFIG_LIMIT = "limit" CONFIG_FILTERS = "filters" CONFIG_FILTER_SOURCE = "source" diff --git a/data_validation/result_handlers/text.py b/data_validation/result_handlers/text.py index 58cbb8c76..e6eda8f7f 100644 --- a/data_validation/result_handlers/text.py +++ b/data_validation/result_handlers/text.py @@ -24,7 +24,30 @@ class TextResultHandler(object): - def execute(self, config, result_df): - print(result_df.to_string(index=False)) + def __init__(self, format): + self.format = format + + def print_formatted_(self, result_df): + """ + Utility for printing formatted results + :param result_df + """ + if self.format == "text": + print(result_df.to_string(index=False)) + elif self.format == "csv": + print(result_df.to_csv(index=False)) + elif self.format == "json": + print(result_df.to_json(orient="index")) + elif self.format == "table": + print(result_df.to_markdown(tablefmt="fancy_grid")) + else: + error_msg = ( + f"format [{self.format}] not supported, results printed in default(table) mode. " + f"Supported formats are [text, csv, json, table]" + ) + print(result_df.to_markdown(tablefmt="fancy_grid")) + raise ValueError(error_msg) + def execute(self, config, result_df): + self.print_formatted_(result_df) return result_df diff --git a/setup.py b/setup.py index e384f0751..6935e797d 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ "google-cloud-spanner==3.1.0", "setuptools>=34.0.0", "jellyfish==0.8.2", + "tabulate==0.8.9", ] extras_require = { diff --git a/tests/system/data_sources/test_bigquery.py b/tests/system/data_sources/test_bigquery.py index d9e980b73..6cd31e29c 100644 --- a/tests/system/data_sources/test_bigquery.py +++ b/tests/system/data_sources/test_bigquery.py @@ -61,6 +61,7 @@ consts.CONFIG_FIELD_ALIAS: "min_birth_year", }, ], + consts.CONFIG_FORMAT: "table", } CONFIG_GROUPED_COUNT_VALID = { @@ -94,6 +95,7 @@ consts.CONFIG_CAST: "date", }, ], + consts.CONFIG_FORMAT: "table", } # TODO: The definition for this table is stored in: ./tests/resources/ @@ -127,6 +129,7 @@ }, ], consts.CONFIG_GROUPED_COLUMNS: [], + consts.CONFIG_FORMAT: "table", } BQ_CONN_NAME = "bq-integration-test" @@ -246,7 +249,7 @@ def test_cli_store_yaml_then_run(): # The number of lines is not significant, except that it represents # the exact file expected to be created. Any change to this value # is likely to be a breaking change and must be assessed. - assert len(yaml_file.readlines()) == 32 + assert len(yaml_file.readlines()) == 33 # Run generated config run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS) diff --git a/tests/system/data_sources/test_mysql.py b/tests/system/data_sources/test_mysql.py index 626f17cd1..81a27f4c8 100644 --- a/tests/system/data_sources/test_mysql.py +++ b/tests/system/data_sources/test_mysql.py @@ -46,6 +46,7 @@ consts.CONFIG_FIELD_ALIAS: "count", }, ], + consts.CONFIG_FORMAT: "table", } diff --git a/tests/system/data_sources/test_postgres.py b/tests/system/data_sources/test_postgres.py index 5295bf43c..0a5829516 100644 --- a/tests/system/data_sources/test_postgres.py +++ b/tests/system/data_sources/test_postgres.py @@ -74,6 +74,7 @@ def test_postgres_count(): consts.CONFIG_FIELD_ALIAS: "count", }, ], + consts.CONFIG_FORMAT: "table", } data_validator = data_validation.DataValidation(config_count_valid, verbose=False,) diff --git a/tests/system/data_sources/test_spanner.py b/tests/system/data_sources/test_spanner.py index 24aafd080..a8eddba85 100644 --- a/tests/system/data_sources/test_spanner.py +++ b/tests/system/data_sources/test_spanner.py @@ -112,6 +112,7 @@ def count_config(spanner_connection_config, database_id): consts.CONFIG_FIELD_ALIAS: "min_int_col", }, ], + consts.CONFIG_FORMAT: "table", } @@ -148,6 +149,7 @@ def grouped_config(spanner_connection_config, database_id): consts.CONFIG_CAST: "date", }, ], + consts.CONFIG_FORMAT: "table", } diff --git a/tests/system/data_sources/test_sql_server.py b/tests/system/data_sources/test_sql_server.py index 6382cc65d..3a2cc2ad2 100644 --- a/tests/system/data_sources/test_sql_server.py +++ b/tests/system/data_sources/test_sql_server.py @@ -74,6 +74,7 @@ def test_sql_server_count(): consts.CONFIG_FIELD_ALIAS: "count", }, ], + consts.CONFIG_FORMAT: "table", } data_validator = data_validation.DataValidation(config_count_valid, verbose=False,) diff --git a/tests/system/data_sources/test_teradata.py b/tests/system/data_sources/test_teradata.py index cf289073a..b427d0499 100644 --- a/tests/system/data_sources/test_teradata.py +++ b/tests/system/data_sources/test_teradata.py @@ -32,6 +32,7 @@ "schema_name": "Sys_Calendar", "table_name": "CALENDAR", "partition_column": "year_of_calendar", + "format": "table", }