diff --git a/data_validation/__main__.py b/data_validation/__main__.py index 0c781247f..b0a53c234 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -15,7 +15,7 @@ import json import os import sys - +import logging from yaml import Dumper, dump from data_validation import ( @@ -491,9 +491,13 @@ def validate(args): def main(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s-%(levelname)s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) # Create Parser and Get Deployment Info args = cli_tools.get_parsed_args() - if args.command == "connections": run_connections(args) elif args.command == "run-config": diff --git a/data_validation/app.py b/data_validation/app.py index aaba5bafe..0ca1bfa10 100644 --- a/data_validation/app.py +++ b/data_validation/app.py @@ -17,6 +17,7 @@ from data_validation import data_validation import flask import pandas +import logging app = flask.Flask(__name__) @@ -62,7 +63,7 @@ def run(): result = validate(config) return str(result) except Exception as e: - print(e) + logging.exception(e) return "Found Error: {}".format(e) diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index 6c38b9926..01bb70ebc 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -47,6 +47,7 @@ import json import sys import uuid +import logging from data_validation import consts from data_validation import state_manager @@ -684,7 +685,7 @@ def list_connections(): connections = mgr.list_connections() for conn_name in connections: - print(f"Connection Name: {conn_name}") + logging.info(f"Connection Name: {conn_name}") def get_connection(connection_name): @@ -715,9 +716,9 @@ def list_validations(): mgr = state_manager.StateManager() validations = mgr.list_validations() - print("Validation YAMLs found:") + logging.info("Validation YAMLs found:") for validation_name in validations: - print(f"{validation_name}.yaml") + logging.info(f"{validation_name}.yaml") def get_labels(arg_labels): diff --git a/data_validation/clients.py b/data_validation/clients.py index 4c1c59f7c..5101dfb71 100644 --- a/data_validation/clients.py +++ b/data_validation/clients.py @@ -15,7 +15,7 @@ import copy import warnings - +import logging import google.oauth2.service_account import ibis import ibis.backends.pandas @@ -204,7 +204,7 @@ def get_all_tables(client, allowed_schemas=None): try: tables = list_tables(client, schema_name) except Exception as e: - print(f"List Tables Error: {schema_name} -> {e}") + logging.warning(f"List Tables Error: {schema_name} -> {e}") continue for table_name in tables: diff --git a/data_validation/combiner.py b/data_validation/combiner.py index 50efee6bf..1b9564b6f 100644 --- a/data_validation/combiner.py +++ b/data_validation/combiner.py @@ -21,7 +21,7 @@ import datetime import functools import json - +import logging import ibis import ibis.expr.datatypes @@ -87,8 +87,8 @@ def generate_report( documented = _add_metadata(joined, run_metadata) if verbose: - print("-- ** Combiner Query ** --") - print(documented.compile()) + logging.info("-- ** Combiner Query ** --") + logging.info(documented.compile()) result_df = client.execute(documented) result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index 6364e9ee8..91ab47fe6 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -565,13 +565,13 @@ def build_config_column_aggregates( if column not in allowlist_columns: continue elif column not in casefold_target_columns: - print( + logging.warning( f"Skipping {agg_type} on {column} as column is not present in target table" ) continue elif supported_types and column_type not in supported_types: if self.verbose: - print( + logging.info( f"Skipping {agg_type} on {column} due to data type: {column_type}" ) continue @@ -629,7 +629,7 @@ def build_config_calculated_fields( elif supported_types and column_type not in supported_types: if self.verbose: msg = f"Skipping {calc_type} on {column} due to data type: {column_type}" - print(msg) + logging.info(msg) continue calculated_config = { diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index 562a802e2..723e11778 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -13,12 +13,12 @@ # limitations under the License. import json -import logging import warnings import ibis.backends.pandas import numpy import pandas +import logging from data_validation import combiner, consts, metadata from data_validation.config_manager import ConfigManager @@ -274,8 +274,8 @@ def _get_pandas_schema(self, source_df, target_df, join_on_fields, verbose=False schema_index.append(key) pd_schema = pandas.Series(schema_data, index=schema_index) if verbose: - print("-- ** Pandas Schema ** --") - print(pd_schema) + logging.info("-- ** Pandas Schema ** --") + logging.info(pd_schema) return pd_schema @@ -346,12 +346,12 @@ def _execute_validation(self, validation_builder, process_in_memory=True): ) except Exception as e: if self.verbose: - print("-- ** Logging Source DF ** --") - print(source_df.dtypes) - print(source_df) - print("-- ** Logging Target DF ** --") - print(target_df.dtypes) - print(target_df) + logging.error("-- ** Logging Source DF ** --") + logging.error(source_df.dtypes) + logging.error(source_df) + logging.error("-- ** Logging Target DF ** --") + logging.error(target_df.dtypes) + logging.error(target_df) raise e else: result_df = combiner.generate_report( diff --git a/data_validation/query_builder/query_builder.py b/data_validation/query_builder/query_builder.py index e7a5a99fe..2233ce902 100644 --- a/data_validation/query_builder/query_builder.py +++ b/data_validation/query_builder/query_builder.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import ibis from data_validation import clients, consts from ibis.expr.types import StringScalar @@ -240,7 +241,7 @@ def compile(self, ibis_table): else: # TODO: need to build Truncation Int support # TODO: should be using a logger - print("WARNING: Unknown cast types can cause memory errors") + logging.warning("Unknown cast types can cause memory errors") # The Casts require we also supply a name. alias = self.alias or self.field_name diff --git a/data_validation/state_manager.py b/data_validation/state_manager.py index d5a799467..f30c40944 100644 --- a/data_validation/state_manager.py +++ b/data_validation/state_manager.py @@ -20,6 +20,7 @@ import enum import json import os +import logging from google.cloud import storage from typing import Dict, List from yaml import dump, load, Dumper, Loader @@ -160,7 +161,7 @@ def _write_file(self, file_path: str, data: str): with open(file_path, "w") as file: file.write(data) - print("Success! Config output written to {}".format(file_path)) + logging.info("Success! Config output written to {}".format(file_path)) def _list_directory(self, directory_path: str) -> List[str]: if self.file_system == FileSystem.GCS: diff --git a/data_validation/validation_builder.py b/data_validation/validation_builder.py index 4db2384b7..bbfa5c638 100644 --- a/data_validation/validation_builder.py +++ b/data_validation/validation_builder.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import logging from copy import deepcopy from data_validation import consts, metadata @@ -385,9 +385,9 @@ def get_source_query(self): else: query = self.source_builder.compile(**source_config) if self.verbose: - print(source_config) - print("-- ** Source Query ** --") - print(query.compile()) + logging.info(source_config) + logging.info("-- ** Source Query ** --") + logging.info(query.compile()) return query @@ -429,9 +429,9 @@ def get_target_query(self): else: query = self.target_builder.compile(**target_config) if self.verbose: - print(target_config) - print("-- ** Target Query ** --") - print(query.compile()) + logging.info(target_config) + logging.info("-- ** Target Query ** --") + logging.info(query.compile()) return query @@ -451,7 +451,7 @@ def get_query_from_file(self, filename): file = open(filename, "r") query = file.read() except IOError: - print("Cannot read query file: ", filename) + logging.warning("Cannot read query file: ", filename) if not query or query.isspace(): raise ValueError( diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py index 1ece38c2c..2601bbe01 100644 --- a/tests/unit/test_cli_tools.py +++ b/tests/unit/test_cli_tools.py @@ -15,7 +15,7 @@ import argparse import pytest from unittest import mock - +import logging from data_validation import cli_tools TEST_CONN = '{"source_type":"Example"}' @@ -114,21 +114,20 @@ def test_get_connection_config_from_args(): assert conn["project_id"] == "example-project" -def test_create_and_list_connections(capsys, fs): +def test_create_and_list_connections(caplog, fs): + + caplog.set_level(logging.INFO) # Create Connection parser = cli_tools.configure_arg_parser() args = parser.parse_args(CLI_ADD_CONNECTION_ARGS) - conn = cli_tools.get_connection_config_from_args(args) cli_tools.store_connection(args.connection_name, conn) - captured = capsys.readouterr() - assert WRITE_SUCCESS_STRING in captured.out + + assert WRITE_SUCCESS_STRING in caplog.records[0].msg # List Connection cli_tools.list_connections() - captured = capsys.readouterr() - - assert captured.out == "Connection Name: test\n" + assert "Connection Name: test" in caplog.records[1].msg def test_configure_arg_parser_list_and_run_validation_configs(): @@ -144,17 +143,16 @@ def test_configure_arg_parser_list_and_run_validation_configs(): assert args.validation_config_cmd == "run" -def test_create_and_list_and_get_validations(capsys, fs): +def test_create_and_list_and_get_validations(caplog, fs): + + caplog.set_level(logging.INFO) # Create validation config file cli_tools.store_validation("example_validation.yaml", TEST_VALIDATION_CONFIG) - captured = capsys.readouterr() - assert WRITE_SUCCESS_STRING in captured.out + assert WRITE_SUCCESS_STRING in caplog.records[0].msg # List validation configs cli_tools.list_validations() - captured = capsys.readouterr() - - assert captured.out == "Validation YAMLs found:\nexample_validation.yaml\n" + assert "Validation YAMLs found:" in caplog.records[1].msg # Retrive the stored vaildation config yaml_config = cli_tools.get_validation("example_validation.yaml")