From dbd9bc387778580a106b39f4dfb9c10fd63d2f6f Mon Sep 17 00:00:00 2001 From: Prayas Purusottam Date: Sat, 3 Sep 2022 00:58:59 +0530 Subject: [PATCH] =?UTF-8?q?feat:=20Addition=20of=20log=20level=20as=20an?= =?UTF-8?q?=20argument=20for=20DVT=20logging=20and=20replac=E2=80=A6=20(#5?= =?UTF-8?q?77)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Addition of log level as an argument for DVT logging and replacing print with logginf.info in result_handler --- README.md | 12 ++++++------ data_validation/__main__.py | 17 ++++++++++++++--- data_validation/cli_tools.py | 7 +++++++ data_validation/combiner.py | 4 ++-- data_validation/result_handlers/bigquery.py | 10 +++++++--- 5 files changed, 36 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index c8ff69ea3..ca2aeec6b 100644 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ over all columns ('*') will only run over numeric columns, unless the `--wildcard-include-string-len` flag is present. ``` -data-validation (--verbose or -v) validate column +data-validation (--verbose or -v) (--log-level or -ll) validate column --source-conn or -sc SOURCE_CONN Source connection details See: *Data Source Configurations* section for each data source @@ -155,7 +155,7 @@ Under the hood, row validation uses apply functions such as IFNULL() or RTRIM(). These can be edited in the YAML config to customize your row validation. ``` -data-validation (--verbose or -v) validate row +data-validation (--verbose or -v) (--log-level or -ll) validate row --source-conn or -sc SOURCE_CONN Source connection details See: *Data Source Configurations* section for each data source @@ -199,7 +199,7 @@ Below is the syntax for schema validations. These can be used to compare case in types between source and target. ``` -data-validation (--verbose or -v) validate schema +data-validation (--verbose or -v) (--log-level or -ll) validate schema --source-conn or -sc SOURCE_CONN Source connection details See: *Data Source Configurations* section for each data source @@ -228,7 +228,7 @@ data-validation (--verbose or -v) validate schema Below is the command syntax for custom query column validations. ``` -data-validation (--verbose or -v) validate custom-query +data-validation (--verbose or -v) (--log-level or -ll) validate custom-query --source-conn or -sc SOURCE_CONN Source connection details See: *Data Source Configurations* section for each data source @@ -282,7 +282,7 @@ in the SELECT statement of both source_query.sql and target_query.sql Below is the command syntax for custom query row validations. ``` -data-validation (--verbose or -v) validate custom-query +data-validation (--verbose or -v) (--log-level or -ll) validate custom-query --source-conn or -sc SOURCE_CONN Source connection details See: *Data Source Configurations* section for each data source @@ -336,7 +336,7 @@ Once the file is updated and saved, the following command runs the validation: ``` -data-validation configs run -c citibike.yaml +data-validation (--verbose or -v) (--log-level or -ll) configs run -c citibike.yaml ``` View the complete YAML file for a Grouped Column validation on the diff --git a/data_validation/__main__.py b/data_validation/__main__.py index cee4036b3..219886dac 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -31,6 +31,15 @@ # by default yaml dumps lists as pointers. This disables that feature Dumper.ignore_aliases = lambda *args: True +# Log level mappings for the input argument of log level string +LOG_LEVEL_MAP = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, + "CRITICAL": logging.CRITICAL, +} + def _get_arg_config_file(args): """Return String yaml config file path.""" @@ -499,13 +508,15 @@ def validate(args): def main(): + + # Create Parser and Get Deployment Info + args = cli_tools.get_parsed_args() logging.basicConfig( - level=logging.INFO, + level=LOG_LEVEL_MAP[args.log_level], format="%(asctime)s-%(levelname)s: %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p", ) - # Create Parser and Get Deployment Info - args = cli_tools.get_parsed_args() + if args.command == "connections": run_connections(args) elif args.command == "run-config": diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 01bb70ebc..72508e470 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -151,6 +151,13 @@ def configure_arg_parser(): ) parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging") + parser.add_argument( + "--log-level", + "-ll", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Log Level to be assigned. This will print logs with level same or above", + ) subparsers = parser.add_subparsers(dest="command") _configure_validate_parser(subparsers) diff --git a/data_validation/combiner.py b/data_validation/combiner.py index 1b9564b6f..d45b19de1 100644 --- a/data_validation/combiner.py +++ b/data_validation/combiner.py @@ -87,8 +87,8 @@ def generate_report( documented = _add_metadata(joined, run_metadata) if verbose: - logging.info("-- ** Combiner Query ** --") - logging.info(documented.compile()) + logging.debug("-- ** Combiner Query ** --") + logging.debug(documented.compile()) result_df = client.execute(documented) result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True) diff --git a/data_validation/result_handlers/bigquery.py b/data_validation/result_handlers/bigquery.py index dbaf8aa26..435f0468b 100644 --- a/data_validation/result_handlers/bigquery.py +++ b/data_validation/result_handlers/bigquery.py @@ -17,7 +17,8 @@ from google.cloud import bigquery from data_validation import client_info -from data_validation.result_handlers.text import TextResultHandler +import logging +from data_validation import consts class BigQueryResultHandler(object): @@ -55,8 +56,11 @@ def get_handler_for_project( return BigQueryResultHandler(client, table_id=table_id) def execute(self, config, result_df): - text_handler = TextResultHandler("table") - text_handler.print_formatted_(result_df) + logging.info( + result_df.drop(consts.COLUMN_FILTER_LIST, axis=1).to_markdown( + tablefmt="fancy_grid", index=False + ) + ) table = self._bigquery_client.get_table(self._table_id) chunk_errors = self._bigquery_client.insert_rows_from_dataframe(