From 77ba13dffcc2948c2349436defd3062309b038ce Mon Sep 17 00:00:00 2001 From: Yogesh Tewari Date: Tue, 29 Jun 2021 00:34:51 -0400 Subject: [PATCH] feat: Allow user to specify a format for stdout (#242) --- data_validation/__main__.py | 7 ++++--- data_validation/cli_tools.py | 6 ++++++ data_validation/data_validation.py | 4 ++-- data_validation/result_handlers/text.py | 24 ++++++++++++++++++++++-- setup.py | 1 + 5 files changed, 35 insertions(+), 7 deletions(-) diff --git a/data_validation/__main__.py b/data_validation/__main__.py index f2426f941..3b2aaa73d 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -272,11 +272,12 @@ def convert_config_to_yaml(args, config_managers): return yaml_config -def run_validation(config_manager, verbose=False): +def run_validation(config_manager, result_format="table", verbose=False): """Run a single validation. Args: config_manager (ConfigManager): Validation config manager instance. + result_format: pretty printing results format verbose (bool): Validation setting to log queries run. """ validator = DataValidation( @@ -285,7 +286,7 @@ def run_validation(config_manager, verbose=False): result_handler=None, verbose=verbose, ) - validator.execute() + validator.execute(result_format) def run_validations(args, config_managers): @@ -296,7 +297,7 @@ def run_validations(args, config_managers): """ # TODO(issue/31): Add parallel execution logic for config_manager in config_managers: - run_validation(config_manager, verbose=args.verbose) + run_validation(config_manager, result_format=args.format, verbose=args.verbose) def store_yaml_config_file(args, config_managers): diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 9b12f8d1d..8a71df3d3 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -285,6 +285,12 @@ def _configure_run_parser(subparsers): "-filters", help="Filters in the format source_filter:target_filter", ) + run_parser.add_argument( + "--format", + "-format", + default="table", + help="Set the format for printing command output", + ) def _configure_connection_parser(subparsers): diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index 2675dfdc0..e68ba44e2 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -86,7 +86,7 @@ def __init__( # TODO(dhercher) we planned on shifting this to use an Execution Handler. # Leaving to to swast on the design of how this should look. - def execute(self): + def execute(self, result_format="table"): """ Execute Queries and Store Results """ if self.config_manager.validation_type == consts.ROW_VALIDATION: grouped_fields = self.validation_builder.pop_grouped_fields() @@ -102,7 +102,7 @@ def execute(self): ) # Call Result Handler to Manage Results - return self.result_handler.execute(self.config, result_df) + return self.result_handler.execute(self.config, result_df, result_format) def query_too_large(self, rows_df, grouped_fields): """ Return bool to dictate if another level of recursion diff --git a/data_validation/result_handlers/text.py b/data_validation/result_handlers/text.py index 58cbb8c76..d8f8d522d 100644 --- a/data_validation/result_handlers/text.py +++ b/data_validation/result_handlers/text.py @@ -23,8 +23,28 @@ """ -class TextResultHandler(object): - def execute(self, config, result_df): +def print_formatted_(result_df, result_format): + """ + Utility for printing formatted results + :param result_df + :param result_format + """ + if result_format == "text": print(result_df.to_string(index=False)) + elif result_format == "csv": + print(result_df.to_csv(index=False)) + elif result_format == "json": + print(result_df.to_json(orient="index")) + elif result_format == "table": + print(result_df.to_markdown(tablefmt="fancy_grid")) + else: + error_msg = f"format [{result_format}] not supported, printed in default(table grid) mode" + print(result_df.to_markdown(tablefmt="fancy_grid")) + raise Exception(error_msg) + + +class TextResultHandler(object): + def execute(self, config, result_df, result_format="table"): + print_formatted_(result_df, result_format) return result_df diff --git a/setup.py b/setup.py index 828bf7109..aa559db69 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ "google-cloud-spanner==3.1.0", "setuptools>=34.0.0", "jellyfish==0.8.2", + "tabulate==0.8.9", ] extras_require = {