From dbd9bc387778580a106b39f4dfb9c10fd63d2f6f Mon Sep 17 00:00:00 2001
From: Prayas Purusottam <kanha.prayas@gmail.com>
Date: Sat, 3 Sep 2022 00:58:59 +0530
Subject: [PATCH] =?UTF-8?q?feat:=20Addition=20of=20log=20level=20as=20an?=
 =?UTF-8?q?=20argument=20for=20DVT=20logging=20and=20replac=E2=80=A6=20(#5?=
 =?UTF-8?q?77)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: Addition of log level as an argument for DVT logging and replacing print with logginf.info in result_handler
---
 README.md                                   | 12 ++++++------
 data_validation/__main__.py                 | 17 ++++++++++++++---
 data_validation/cli_tools.py                |  7 +++++++
 data_validation/combiner.py                 |  4 ++--
 data_validation/result_handlers/bigquery.py | 10 +++++++---
 5 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index c8ff69ea3..ca2aeec6b 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ over all columns ('*') will only run over numeric columns, unless the
 `--wildcard-include-string-len` flag is present.
 
 ```
-data-validation (--verbose or -v) validate column
+data-validation (--verbose or -v) (--log-level or -ll) validate column
   --source-conn or -sc SOURCE_CONN
                         Source connection details
                         See: *Data Source Configurations* section for each data source
@@ -155,7 +155,7 @@ Under the hood, row validation uses
 apply functions such as IFNULL() or RTRIM(). These can be edited in the YAML config to customize your row validation.
 
 ```
-data-validation (--verbose or -v) validate row
+data-validation (--verbose or -v) (--log-level or -ll) validate row
   --source-conn or -sc SOURCE_CONN
                         Source connection details
                         See: *Data Source Configurations* section for each data source
@@ -199,7 +199,7 @@ Below is the syntax for schema validations. These can be used to compare case in
 types between source and target.
 
 ```
-data-validation (--verbose or -v) validate schema
+data-validation (--verbose or -v) (--log-level or -ll) validate schema
   --source-conn or -sc SOURCE_CONN
                         Source connection details
                         See: *Data Source Configurations* section for each data source
@@ -228,7 +228,7 @@ data-validation (--verbose or -v) validate schema
 Below is the command syntax for custom query column validations.
 
 ```
-data-validation (--verbose or -v) validate custom-query
+data-validation (--verbose or -v) (--log-level or -ll) validate custom-query
   --source-conn or -sc SOURCE_CONN
                         Source connection details
                         See: *Data Source Configurations* section for each data source
@@ -282,7 +282,7 @@ in the SELECT statement of both source_query.sql and target_query.sql
 Below is the command syntax for custom query row validations.
 
 ```
-data-validation (--verbose or -v) validate custom-query
+data-validation (--verbose or -v) (--log-level or -ll) validate custom-query
   --source-conn or -sc SOURCE_CONN
                         Source connection details
                         See: *Data Source Configurations* section for each data source
@@ -336,7 +336,7 @@ Once the file is updated and saved, the following command runs the
 validation:
 
 ```
-data-validation configs run -c citibike.yaml
+data-validation (--verbose or -v) (--log-level or -ll) configs run -c citibike.yaml
 ```
 
 View the complete YAML file for a Grouped Column validation on the
diff --git a/data_validation/__main__.py b/data_validation/__main__.py
index cee4036b3..219886dac 100644
--- a/data_validation/__main__.py
+++ b/data_validation/__main__.py
@@ -31,6 +31,15 @@
 # by default yaml dumps lists as pointers. This disables that feature
 Dumper.ignore_aliases = lambda *args: True
 
+# Log level mappings for the input argument of log level string
+LOG_LEVEL_MAP = {
+    "DEBUG": logging.DEBUG,
+    "INFO": logging.INFO,
+    "WARNING": logging.WARNING,
+    "ERROR": logging.ERROR,
+    "CRITICAL": logging.CRITICAL,
+}
+
 
 def _get_arg_config_file(args):
     """Return String yaml config file path."""
@@ -499,13 +508,15 @@ def validate(args):
 
 
 def main():
+
+    # Create Parser and Get Deployment Info
+    args = cli_tools.get_parsed_args()
     logging.basicConfig(
-        level=logging.INFO,
+        level=LOG_LEVEL_MAP[args.log_level],
         format="%(asctime)s-%(levelname)s: %(message)s",
         datefmt="%m/%d/%Y %I:%M:%S %p",
     )
-    # Create Parser and Get Deployment Info
-    args = cli_tools.get_parsed_args()
+
     if args.command == "connections":
         run_connections(args)
     elif args.command == "run-config":
diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py
index 01bb70ebc..72508e470 100644
--- a/data_validation/cli_tools.py
+++ b/data_validation/cli_tools.py
@@ -151,6 +151,13 @@ def configure_arg_parser():
     )
 
     parser.add_argument("--verbose", "-v", action="store_true", help="Verbose logging")
+    parser.add_argument(
+        "--log-level",
+        "-ll",
+        default="INFO",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        help="Log Level to be assigned. This will print logs with level same or above",
+    )
 
     subparsers = parser.add_subparsers(dest="command")
     _configure_validate_parser(subparsers)
diff --git a/data_validation/combiner.py b/data_validation/combiner.py
index 1b9564b6f..d45b19de1 100644
--- a/data_validation/combiner.py
+++ b/data_validation/combiner.py
@@ -87,8 +87,8 @@ def generate_report(
     documented = _add_metadata(joined, run_metadata)
 
     if verbose:
-        logging.info("-- ** Combiner Query ** --")
-        logging.info(documented.compile())
+        logging.debug("-- ** Combiner Query ** --")
+        logging.debug(documented.compile())
 
     result_df = client.execute(documented)
     result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True)
diff --git a/data_validation/result_handlers/bigquery.py b/data_validation/result_handlers/bigquery.py
index dbaf8aa26..435f0468b 100644
--- a/data_validation/result_handlers/bigquery.py
+++ b/data_validation/result_handlers/bigquery.py
@@ -17,7 +17,8 @@
 from google.cloud import bigquery
 
 from data_validation import client_info
-from data_validation.result_handlers.text import TextResultHandler
+import logging
+from data_validation import consts
 
 
 class BigQueryResultHandler(object):
@@ -55,8 +56,11 @@ def get_handler_for_project(
         return BigQueryResultHandler(client, table_id=table_id)
 
     def execute(self, config, result_df):
-        text_handler = TextResultHandler("table")
-        text_handler.print_formatted_(result_df)
+        logging.info(
+            result_df.drop(consts.COLUMN_FILTER_LIST, axis=1).to_markdown(
+                tablefmt="fancy_grid", index=False
+            )
+        )
 
         table = self._bigquery_client.get_table(self._table_id)
         chunk_errors = self._bigquery_client.insert_rows_from_dataframe(