Skip to content

Commit

Permalink
Revert "feat: Allow user to specify a format for stdout (#242) (#293)" (
Browse files Browse the repository at this point in the history
#295)

This reverts commit f0a9fa1.

Co-authored-by: Neha Nene <[email protected]>
  • Loading branch information
yogeshtewari and nehanene15 committed Aug 4, 2021
1 parent f0a9fa1 commit 5fe6a8d
Show file tree
Hide file tree
Showing 10 changed files with 10 additions and 73 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ data-validation run
--labels or -l KEY1=VALUE1,KEY2=VALUE2
(Optional) Comma-separated key value pair labels for the run.
--verbose or -v Verbose logging will print queries executed
--format or -fmt Format for stdout output, Supported formats are (text, csv, json, table)
It defaults to table.
```

The default aggregation type is a 'COUNT *'. If no aggregation flag (i.e count,
Expand Down
2 changes: 0 additions & 2 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ def build_config_managers_from_args(args):
result_handler_config=result_handler_config,
filter_config=filter_config,
verbose=args.verbose,
format=args.format,
)
configs.append(build_config_from_args(args, config_manager))

Expand Down Expand Up @@ -282,7 +281,6 @@ def run_validation(config_manager, verbose=False):
"""
validator = DataValidation(
config_manager.config,
format=config_manager.format,
validation_builder=None,
result_handler=None,
verbose=verbose,
Expand Down
6 changes: 0 additions & 6 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,6 @@ def _configure_run_parser(subparsers):
"-filters",
help="Filters in the format source_filter:target_filter",
)
run_parser.add_argument(
"--format",
"-fmt",
default="table",
help="Set the format for printing command output, Supported formats are (text, csv, json, table)",
)


def _configure_connection_parser(subparsers):
Expand Down
7 changes: 0 additions & 7 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,6 @@ def threshold(self):
"""Return threshold from Config """
return self._config.get(consts.CONFIG_THRESHOLD, 0.0)

@property
def format(self):
"""Return threshold from Config """
return self._config.get(consts.CONFIG_FORMAT, "table")

def get_source_ibis_table(self):
"""Return IbisTable from source."""
if not hasattr(self, "_source_ibis_table"):
Expand Down Expand Up @@ -274,7 +269,6 @@ def build_config_manager(
table_obj,
labels,
threshold,
format,
result_handler_config=None,
filter_config=None,
verbose=False,
Expand All @@ -295,7 +289,6 @@ def build_config_manager(
consts.CONFIG_THRESHOLD: threshold,
consts.CONFIG_RESULT_HANDLER: result_handler_config,
consts.CONFIG_FILTERS: filter_config,
consts.CONFIG_FORMAT: format,
}

# Only FileSystem connections do not require schemas
Expand Down
1 change: 0 additions & 1 deletion data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
CONFIG_SOURCE_COLUMN = "source_column"
CONFIG_TARGET_COLUMN = "target_column"
CONFIG_THRESHOLD = "threshold"
CONFIG_FORMAT = "format"
CONFIG_CAST = "cast"
CONFIG_LIMIT = "limit"
CONFIG_FILTERS = "filters"
Expand Down
5 changes: 1 addition & 4 deletions data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class DataValidation(object):
def __init__(
self,
config,
format="table",
validation_builder=None,
schema_validator=None,
result_handler=None,
Expand All @@ -59,8 +58,6 @@ def __init__(
# Data Client Management
self.config = config

self.format = format

self.source_client = clients.get_data_client(
self.config[consts.CONFIG_SOURCE_CONN]
)
Expand Down Expand Up @@ -105,7 +102,7 @@ def execute(self):
)

# Call Result Handler to Manage Results
return self.result_handler.execute(self.config, self.format, result_df)
return self.result_handler.execute(self.config, result_df)

def query_too_large(self, rows_df, grouped_fields):
""" Return bool to dictate if another level of recursion
Expand Down
27 changes: 2 additions & 25 deletions data_validation/result_handlers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,8 @@
"""


def print_formatted_(format, result_df):
"""
Utility for printing formatted results
:param result_df
:param format
"""
if format == "text":
print(result_df.to_string(index=False))
elif format == "csv":
print(result_df.to_csv(index=False))
elif format == "json":
print(result_df.to_json(orient="index"))
elif format == "table":
print(result_df.to_markdown(tablefmt="fancy_grid"))
else:
error_msg = (
f"format [{format}] not supported, results printed in default(table) mode. "
f"Supported formats are [text, csv, json, table]"
)
print(result_df.to_markdown(tablefmt="fancy_grid"))
raise ValueError(error_msg)


class TextResultHandler(object):
def execute(self, config, format, result_df):
print_formatted_(format, result_df)
def execute(self, config, result_df):
print(result_df.to_string(index=False))

return result_df
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
"google-cloud-spanner==3.1.0",
"setuptools>=34.0.0",
"jellyfish==0.8.2",
"tabulate==0.8.9",
]

extras_require = {
Expand Down
29 changes: 6 additions & 23 deletions tests/system/data_sources/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@

import os

import pytest

from data_validation import __main__ as main
from data_validation import cli_tools, consts, data_validation
from data_validation import __main__ as main


BQ_CONN = {"source_type": "BigQuery", "project_id": os.environ["PROJECT_ID"]}
CONFIG_COUNT_VALID = {
Expand Down Expand Up @@ -179,9 +178,7 @@


def test_count_validator():
validator = data_validation.DataValidation(
CONFIG_COUNT_VALID, format="text", verbose=True
)
validator = data_validation.DataValidation(CONFIG_COUNT_VALID, verbose=True)
df = validator.execute()

count_value = df[df["validation_name"] == "count"]["source_agg_value"].values[0]
Expand Down Expand Up @@ -210,9 +207,7 @@ def test_count_validator():


def test_grouped_count_validator():
validator = data_validation.DataValidation(
CONFIG_GROUPED_COUNT_VALID, format="csv", verbose=True
)
validator = data_validation.DataValidation(CONFIG_GROUPED_COUNT_VALID, verbose=True)
df = validator.execute()
rows = list(df[df["validation_name"] == "count"].iterrows())

Expand All @@ -228,9 +223,7 @@ def test_grouped_count_validator():


def test_numeric_types():
validator = data_validation.DataValidation(
CONFIG_NUMERIC_AGG_VALID, format="json", verbose=True
)
validator = data_validation.DataValidation(CONFIG_NUMERIC_AGG_VALID, verbose=True)
df = validator.execute()

for validation in df.to_dict(orient="records"):
Expand All @@ -253,7 +246,7 @@ def test_cli_store_yaml_then_run():
# The number of lines is not significant, except that it represents
# the exact file expected to be created. Any change to this value
# is likely to be a breaking change and must be assessed.
assert len(yaml_file.readlines()) == 33
assert len(yaml_file.readlines()) == 32

# Run generated config
run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS)
Expand Down Expand Up @@ -285,13 +278,3 @@ def _store_bq_conn():
def _remove_bq_conn():
file_path = cli_tools._get_connection_file(BQ_CONN_NAME)
os.remove(file_path)


def test_unsupported_result_format():
with pytest.raises(ValueError):
validator = data_validation.DataValidation(
CONFIG_GROUPED_COUNT_VALID, format="foobar", verbose=True
)
df = validator.execute()
rows = list(df[df["validation_name"] == "count"].iterrows())
assert len(rows) > 1
3 changes: 1 addition & 2 deletions tests/unit/result_handlers/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ def test_import(module_under_test):

def test_basic_result_handler(module_under_test):
"""Test basic handler executes """
format = "json"
result_df = DataFrame(SAMPLE_RESULT_DATA)
result_handler = module_under_test.TextResultHandler()

handler_output = result_handler.execute(SAMPLE_CONFIG, format, result_df)
handler_output = result_handler.execute(SAMPLE_CONFIG, result_df)
assert handler_output["count"].sum() == result_df["count"].sum()

0 comments on commit 5fe6a8d

Please sign in to comment.