Revert "feat: Allow user to specify a format for stdout (#242) (#293)" (

#295) This reverts commit f0a9fa1. Co-authored-by: Neha Nene <[email protected]>
GoogleCloudPlatform · Aug 4, 2021 · 5fe6a8d · 5fe6a8d
1 parent f0a9fa1
commit 5fe6a8d
Show file tree

Hide file tree

Showing 10 changed files with 10 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -110,8 +110,6 @@ data-validation run
   --labels or -l KEY1=VALUE1,KEY2=VALUE2
                         (Optional) Comma-separated key value pair labels for the run.
   --verbose or -v       Verbose logging will print queries executed
-  --format or -fmt      Format for stdout output, Supported formats are (text, csv, json, table)
-                        It defaults to table.
 ```
 
 The default aggregation type is a 'COUNT *'. If no aggregation flag (i.e count,

diff --git a/data_validation/__main__.py b/data_validation/__main__.py
@@ -146,7 +146,6 @@ def build_config_managers_from_args(args):
             result_handler_config=result_handler_config,
             filter_config=filter_config,
             verbose=args.verbose,
-            format=args.format,
         )
         configs.append(build_config_from_args(args, config_manager))
 
@@ -282,7 +281,6 @@ def run_validation(config_manager, verbose=False):
     """
     validator = DataValidation(
         config_manager.config,
-        format=config_manager.format,
         validation_builder=None,
         result_handler=None,
         verbose=verbose,

diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py
@@ -291,12 +291,6 @@ def _configure_run_parser(subparsers):
         "-filters",
         help="Filters in the format source_filter:target_filter",
     )
-    run_parser.add_argument(
-        "--format",
-        "-fmt",
-        default="table",
-        help="Set the format for printing command output, Supported formats are (text, csv, json, table)",
-    )
 
 
 def _configure_connection_parser(subparsers):

diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py
@@ -188,11 +188,6 @@ def threshold(self):
         """Return threshold from Config """
         return self._config.get(consts.CONFIG_THRESHOLD, 0.0)
 
-    @property
-    def format(self):
-        """Return threshold from Config """
-        return self._config.get(consts.CONFIG_FORMAT, "table")
-
     def get_source_ibis_table(self):
         """Return IbisTable from source."""
         if not hasattr(self, "_source_ibis_table"):
@@ -274,7 +269,6 @@ def build_config_manager(
         table_obj,
         labels,
         threshold,
-        format,
         result_handler_config=None,
         filter_config=None,
         verbose=False,
@@ -295,7 +289,6 @@ def build_config_manager(
             consts.CONFIG_THRESHOLD: threshold,
             consts.CONFIG_RESULT_HANDLER: result_handler_config,
             consts.CONFIG_FILTERS: filter_config,
-            consts.CONFIG_FORMAT: format,
         }
 
         # Only FileSystem connections do not require schemas

diff --git a/data_validation/consts.py b/data_validation/consts.py
@@ -34,7 +34,6 @@
 CONFIG_SOURCE_COLUMN = "source_column"
 CONFIG_TARGET_COLUMN = "target_column"
 CONFIG_THRESHOLD = "threshold"
-CONFIG_FORMAT = "format"
 CONFIG_CAST = "cast"
 CONFIG_LIMIT = "limit"
 CONFIG_FILTERS = "filters"

diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py
@@ -39,7 +39,6 @@ class DataValidation(object):
     def __init__(
         self,
         config,
-        format="table",
         validation_builder=None,
         schema_validator=None,
         result_handler=None,
@@ -59,8 +58,6 @@ def __init__(
         # Data Client Management
         self.config = config
 
-        self.format = format
-
         self.source_client = clients.get_data_client(
             self.config[consts.CONFIG_SOURCE_CONN]
         )
@@ -105,7 +102,7 @@ def execute(self):
             )
 
         # Call Result Handler to Manage Results
-        return self.result_handler.execute(self.config, self.format, result_df)
+        return self.result_handler.execute(self.config, result_df)
 
     def query_too_large(self, rows_df, grouped_fields):
         """ Return bool to dictate if another level of recursion

diff --git a/data_validation/result_handlers/text.py b/data_validation/result_handlers/text.py
@@ -23,31 +23,8 @@
 """
 
 
-def print_formatted_(format, result_df):
-    """
-    Utility for printing formatted results
-    :param result_df
-    :param format
-    """
-    if format == "text":
-        print(result_df.to_string(index=False))
-    elif format == "csv":
-        print(result_df.to_csv(index=False))
-    elif format == "json":
-        print(result_df.to_json(orient="index"))
-    elif format == "table":
-        print(result_df.to_markdown(tablefmt="fancy_grid"))
-    else:
-        error_msg = (
-            f"format [{format}] not supported, results printed in default(table) mode. "
-            f"Supported formats are [text, csv, json, table]"
-        )
-        print(result_df.to_markdown(tablefmt="fancy_grid"))
-        raise ValueError(error_msg)
-
-
 class TextResultHandler(object):
-    def execute(self, config, format, result_df):
-        print_formatted_(format, result_df)
+    def execute(self, config, result_df):
+        print(result_df.to_string(index=False))
 
         return result_df
diff --git a/setup.py b/setup.py
@@ -48,7 +48,6 @@
     "google-cloud-spanner==3.1.0",
     "setuptools>=34.0.0",
     "jellyfish==0.8.2",
-    "tabulate==0.8.9",
 ]
 
 extras_require = {

diff --git a/tests/system/data_sources/test_bigquery.py b/tests/system/data_sources/test_bigquery.py
@@ -14,10 +14,9 @@
 
 import os
 
-import pytest
-
-from data_validation import __main__ as main
 from data_validation import cli_tools, consts, data_validation
+from data_validation import __main__ as main
+
 
 BQ_CONN = {"source_type": "BigQuery", "project_id": os.environ["PROJECT_ID"]}
 CONFIG_COUNT_VALID = {
@@ -179,9 +178,7 @@
 
 
 def test_count_validator():
-    validator = data_validation.DataValidation(
-        CONFIG_COUNT_VALID, format="text", verbose=True
-    )
+    validator = data_validation.DataValidation(CONFIG_COUNT_VALID, verbose=True)
     df = validator.execute()
 
     count_value = df[df["validation_name"] == "count"]["source_agg_value"].values[0]
@@ -210,9 +207,7 @@ def test_count_validator():
 
 
 def test_grouped_count_validator():
-    validator = data_validation.DataValidation(
-        CONFIG_GROUPED_COUNT_VALID, format="csv", verbose=True
-    )
+    validator = data_validation.DataValidation(CONFIG_GROUPED_COUNT_VALID, verbose=True)
     df = validator.execute()
     rows = list(df[df["validation_name"] == "count"].iterrows())
 
@@ -228,9 +223,7 @@ def test_grouped_count_validator():
 
 
 def test_numeric_types():
-    validator = data_validation.DataValidation(
-        CONFIG_NUMERIC_AGG_VALID, format="json", verbose=True
-    )
+    validator = data_validation.DataValidation(CONFIG_NUMERIC_AGG_VALID, verbose=True)
     df = validator.execute()
 
     for validation in df.to_dict(orient="records"):
@@ -253,7 +246,7 @@ def test_cli_store_yaml_then_run():
         # The number of lines is not significant, except that it represents
         # the exact file expected to be created.  Any change to this value
         # is likely to be a breaking change and must be assessed.
-        assert len(yaml_file.readlines()) == 33
+        assert len(yaml_file.readlines()) == 32
 
     # Run generated config
     run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS)
@@ -285,13 +278,3 @@ def _store_bq_conn():
 def _remove_bq_conn():
     file_path = cli_tools._get_connection_file(BQ_CONN_NAME)
     os.remove(file_path)
-
-
-def test_unsupported_result_format():
-    with pytest.raises(ValueError):
-        validator = data_validation.DataValidation(
-            CONFIG_GROUPED_COUNT_VALID, format="foobar", verbose=True
-        )
-        df = validator.execute()
-        rows = list(df[df["validation_name"] == "count"].iterrows())
-        assert len(rows) > 1
diff --git a/tests/unit/result_handlers/test_text.py b/tests/unit/result_handlers/test_text.py
@@ -38,9 +38,8 @@ def test_import(module_under_test):
 
 def test_basic_result_handler(module_under_test):
     """Test basic handler executes """
-    format = "json"
     result_df = DataFrame(SAMPLE_RESULT_DATA)
     result_handler = module_under_test.TextResultHandler()
 
-    handler_output = result_handler.execute(SAMPLE_CONFIG, format, result_df)
+    handler_output = result_handler.execute(SAMPLE_CONFIG, result_df)
     assert handler_output["count"].sum() == result_df["count"].sum()