GoogleCloudPlatform · yogeshtewari · Aug 25, 2021 · Aug 5, 2021 · Aug 5, 2021 · Aug 5, 2021
diff --git a/README.md b/README.md
@@ -110,6 +110,8 @@ data-validation run
   --labels or -l KEY1=VALUE1,KEY2=VALUE2
                         (Optional) Comma-separated key value pair labels for the run.
   --verbose or -v       Verbose logging will print queries executed
+  --format or -fmt      Format for stdout output, Supported formats are (text, csv, json, table)
+                        It defaults to table.
 ```
 
 The default aggregation type is a 'COUNT *'. If no aggregation flag (i.e count,

@@ -127,6 +127,7 @@ def build_config_managers_from_args(args):
     target_client = clients.get_data_client(target_conn)
 
     threshold = args.threshold if args.threshold else 0.0
+    format = args.format if args.format else "table"
 
     is_filesystem = True if source_conn["source_type"] == "FileSystem" else False
     tables_list = cli_tools.get_tables_list(
@@ -143,6 +144,7 @@ def build_config_managers_from_args(args):
             table_obj,
             labels,
             threshold,
+            format,
             result_handler_config=result_handler_config,
             filter_config=filter_config,
             verbose=args.verbose,

@@ -291,6 +291,13 @@ def _configure_run_parser(subparsers):
         "-filters",
         help="Filters in the format source_filter:target_filter",
     )
+    run_parser.add_argument(
+        "--format",
+        "-fmt",
+        default="table",
+        help="Set the format for printing command output, Supported formats are (text, csv, json, table). It defaults "
+        "to table",
+    )
 
 
 def _configure_connection_parser(subparsers):

@@ -24,7 +24,6 @@
 
 
 class ConfigManager(object):
-
     _config: dict = None
     source_client = None
     target_client = None
@@ -238,7 +237,7 @@ def get_yaml_validation_block(self):
     def get_result_handler(self):
         """Return ResultHandler instance from supplied config."""
         if not self.result_handler_config:
-            return TextResultHandler()
+            return TextResultHandler(self._config[consts.CONFIG_FORMAT])
 
         result_type = self.result_handler_config[consts.CONFIG_TYPE]
         if result_type == "BigQuery":
@@ -269,6 +268,7 @@ def build_config_manager(
         table_obj,
         labels,
         threshold,
+        format,
         result_handler_config=None,
         filter_config=None,
         verbose=False,
@@ -287,6 +287,7 @@ def build_config_manager(
             ),
             consts.CONFIG_LABELS: labels,
             consts.CONFIG_THRESHOLD: threshold,
+            consts.CONFIG_FORMAT: format,
             consts.CONFIG_RESULT_HANDLER: result_handler_config,
             consts.CONFIG_FILTERS: filter_config,
         }

@@ -35,6 +35,7 @@
 CONFIG_TARGET_COLUMN = "target_column"
 CONFIG_THRESHOLD = "threshold"
 CONFIG_CAST = "cast"
+CONFIG_FORMAT = "format"
 CONFIG_LIMIT = "limit"
 CONFIG_FILTERS = "filters"
 CONFIG_FILTER_SOURCE = "source"
@@ -99,3 +100,18 @@
 
 # Ibis Object Info
 NUMERIC_DATA_TYPES = ["float64", "int32", "int64", "decimal"]
+
+FORMAT_TYPES = ["csv", "json", "table", "text"]
+
+# Text Result Handler column filter list
+COLUMN_FILTER_LIST = [
+    "aggregation_type",
+    "difference",
+    "end_time",
+    "labels",
+    "pct_threshold",
+    "run_id",
+    "source_agg_value",
+    "start_time",
+    "target_agg_value",
+]
@@ -22,9 +22,41 @@
 Output validation report to text-based log
 """
 
+from data_validation import consts
+
 
 class TextResultHandler(object):
-    def execute(self, config, result_df):
-        print(result_df.to_string(index=False))
+    def __init__(self, format, cols_filter_list=consts.COLUMN_FILTER_LIST):
+        self.format = format
+        self.cols_filter_list = cols_filter_list
+
+    def print_formatted_(self, result_df):
+        """
+        Utility for printing formatted results
+        :param result_df
+        """
+        if self.format == "text":
+            print(result_df.to_string(index=False))
+        elif self.format == "csv":
+            print(result_df.to_csv(index=False))
+        elif self.format == "json":
+            print(result_df.to_json(orient="index"))
+        else:
+            print(
+                result_df.drop(self.cols_filter_list, axis=1).to_markdown(
+                    tablefmt="fancy_grid"
+                )
+            )
+
+        if self.format not in consts.FORMAT_TYPES:
+            error_msg = (
+                f"format [{self.format}] not supported, results printed in default(table) mode. "
+                f"Supported formats are [text, csv, json, table]"
+            )
+            raise ValueError(error_msg)
 
         return result_df
+
+    def execute(self, config, result_df):
+        self.print_formatted_(result_df)
+        return result_df
@@ -48,6 +48,7 @@
     "google-cloud-spanner==3.1.0",
     "setuptools>=34.0.0",
     "jellyfish==0.8.2",
+    "tabulate==0.8.9",
 ]
 
 extras_require = {

@@ -61,6 +61,7 @@
             consts.CONFIG_FIELD_ALIAS: "min_birth_year",
         },
     ],
+    consts.CONFIG_FORMAT: "table",
 }
 
 CONFIG_GROUPED_COUNT_VALID = {
@@ -94,6 +95,7 @@
             consts.CONFIG_CAST: "date",
         },
     ],
+    consts.CONFIG_FORMAT: "table",
 }
 
 # TODO: The definition for this table is stored in: ./tests/resources/
@@ -127,6 +129,7 @@
         },
     ],
     consts.CONFIG_GROUPED_COLUMNS: [],
+    consts.CONFIG_FORMAT: "table",
 }
 
 BQ_CONN_NAME = "bq-integration-test"
@@ -246,7 +249,7 @@ def test_cli_store_yaml_then_run():
         # The number of lines is not significant, except that it represents
         # the exact file expected to be created.  Any change to this value
         # is likely to be a breaking change and must be assessed.
-        assert len(yaml_file.readlines()) == 32
+        assert len(yaml_file.readlines()) == 33
 
     # Run generated config
     run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS)

@@ -46,6 +46,7 @@
             consts.CONFIG_FIELD_ALIAS: "count",
         },
     ],
+    consts.CONFIG_FORMAT: "table",
 }
 
 

@@ -74,6 +74,7 @@ def test_postgres_count():
                 consts.CONFIG_FIELD_ALIAS: "count",
             },
         ],
+        consts.CONFIG_FORMAT: "table",
     }
 
     data_validator = data_validation.DataValidation(config_count_valid, verbose=False,)

@@ -112,6 +112,7 @@ def count_config(spanner_connection_config, database_id):
                 consts.CONFIG_FIELD_ALIAS: "min_int_col",
             },
         ],
+        consts.CONFIG_FORMAT: "table",
     }
 
 
@@ -148,6 +149,7 @@ def grouped_config(spanner_connection_config, database_id):
                 consts.CONFIG_CAST: "date",
             },
         ],
+        consts.CONFIG_FORMAT: "table",
     }
 
 

@@ -74,6 +74,7 @@ def test_sql_server_count():
                 consts.CONFIG_FIELD_ALIAS: "count",
             },
         ],
+        consts.CONFIG_FORMAT: "table",
     }
 
     data_validator = data_validation.DataValidation(config_count_valid, verbose=False,)

@@ -32,6 +32,7 @@
     "schema_name": "Sys_Calendar",
     "table_name": "CALENDAR",
     "partition_column": "year_of_calendar",
+    "format": "table",
 }
 
 

@@ -16,12 +16,10 @@
 
 from pandas import DataFrame
 
-
 SAMPLE_CONFIG = {}
-SAMPLE_RESULT_DATA = [
-    {"table_name": "my_table", "count": 10},
-    {"table_name": "my_table", "count": 10},
-]
+SAMPLE_RESULT_DATA = [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
+SAMPLE_RESULT_COLUMNS = ["A", "B", "C", "D"]
+SAMPLE_RESULT_COLUMNS_FILTER_LIST = ["B", "D"]
 
 
 @pytest.fixture
@@ -38,8 +36,47 @@ def test_import(module_under_test):
 
 def test_basic_result_handler(module_under_test):
     """Test basic handler executes """
-    result_df = DataFrame(SAMPLE_RESULT_DATA)
-    result_handler = module_under_test.TextResultHandler()
+    result_df = DataFrame(SAMPLE_RESULT_DATA, columns=SAMPLE_RESULT_COLUMNS)
+    format = "csv"
+    result_handler = module_under_test.TextResultHandler(
+        format, SAMPLE_RESULT_COLUMNS_FILTER_LIST
+    )
 
     handler_output = result_handler.execute(SAMPLE_CONFIG, result_df)
-    assert handler_output["count"].sum() == result_df["count"].sum()
+    assert handler_output["A"].sum() == result_df["A"].sum()
+
+
+def test_unsupported_result_format(module_under_test):
+    """Check for invalid format"""
+    with pytest.raises(ValueError):
+        result_df = DataFrame(SAMPLE_RESULT_DATA, columns=SAMPLE_RESULT_COLUMNS)
+        format = "foobar"
+        result_handler = module_under_test.TextResultHandler(
+            format, SAMPLE_RESULT_COLUMNS_FILTER_LIST
+        )
+
+        handler_output = result_handler.execute(SAMPLE_CONFIG, result_df)
+        assert handler_output["A"].sum() == result_df["A"].sum()
+
+
+def test_columns_to_print(module_under_test, capsys):
+    """Check for trimmed columns in grid print"""
+    result_df = DataFrame(SAMPLE_RESULT_DATA, columns=SAMPLE_RESULT_COLUMNS)
+    format = "table"
+    result_handler = module_under_test.TextResultHandler(
+        format, SAMPLE_RESULT_COLUMNS_FILTER_LIST
+    )
+    result_handler.execute(SAMPLE_CONFIG, result_df)
+
+    grid_text = "││A│C││0│0│2││1│4│6││2│8│10│"
+    printed_text = capsys.readouterr().out
+    printed_text = (
+        printed_text.replace("\n", "")
+        .replace("'", "")
+        .replace(" ", "")
+        .replace("╒════╤═════╤═════╕", "")
+        .replace("╞════╪═════╪═════╡", "")
+        .replace("├────┼─────┼─────┤", "")
+        .replace("╘════╧═════╧═════╛", "")
+    )
+    assert printed_text == grid_text
@@ -67,6 +67,7 @@
     ],
     consts.CONFIG_THRESHOLD: 0.0,
     consts.CONFIG_RESULT_HANDLER: None,
+    consts.CONFIG_FORMAT: "table",
 }
 
 SAMPLE_THRESHOLD_CONFIG = {
@@ -97,6 +98,7 @@
     ],
     consts.CONFIG_THRESHOLD: 150.0,
     consts.CONFIG_RESULT_HANDLER: None,
+    consts.CONFIG_FORMAT: "table",
 }
 
 SAMPLE_ROW_CONFIG = {
@@ -136,6 +138,7 @@
         },
     ],
     consts.CONFIG_RESULT_HANDLER: None,
+    consts.CONFIG_FORMAT: "table",
 }
 
 SAMPLE_ROW_CALC_CONFIG = {
@@ -223,6 +226,7 @@
         },
     ],
     consts.CONFIG_RESULT_HANDLER: None,
+    consts.CONFIG_FORMAT: "table",
 }
 
 

@@ -51,6 +51,7 @@
     consts.CONFIG_AGGREGATES: [],
     consts.CONFIG_THRESHOLD: 0.0,
     consts.CONFIG_RESULT_HANDLER: None,
+    consts.CONFIG_FORMAT: "table",
 }
 
 STRING_CONSTANT = "constant"