Skip to content

Commit

Permalink
fix: Issue422 replace print with logging (#543)
Browse files Browse the repository at this point in the history
* Replace print stets with Python logging

* Replace print stets with Python logging

* Deleting a test log statement from main

* fix: Replace print stats with Python logging

* fix: Adding corrections as recommended
  • Loading branch information
latika-wadhwa committed Jul 27, 2022
1 parent 9ea776a commit 78222b4
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 47 deletions.
8 changes: 6 additions & 2 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import json
import os
import sys

import logging
from yaml import Dumper, dump

from data_validation import (
Expand Down Expand Up @@ -491,9 +491,13 @@ def validate(args):


def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s-%(levelname)s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
# Create Parser and Get Deployment Info
args = cli_tools.get_parsed_args()

if args.command == "connections":
run_connections(args)
elif args.command == "run-config":
Expand Down
3 changes: 2 additions & 1 deletion data_validation/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from data_validation import data_validation
import flask
import pandas
import logging

app = flask.Flask(__name__)

Expand Down Expand Up @@ -62,7 +63,7 @@ def run():
result = validate(config)
return str(result)
except Exception as e:
print(e)
logging.exception(e)
return "Found Error: {}".format(e)


Expand Down
7 changes: 4 additions & 3 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import json
import sys
import uuid
import logging

from data_validation import consts
from data_validation import state_manager
Expand Down Expand Up @@ -684,7 +685,7 @@ def list_connections():
connections = mgr.list_connections()

for conn_name in connections:
print(f"Connection Name: {conn_name}")
logging.info(f"Connection Name: {conn_name}")


def get_connection(connection_name):
Expand Down Expand Up @@ -715,9 +716,9 @@ def list_validations():
mgr = state_manager.StateManager()
validations = mgr.list_validations()

print("Validation YAMLs found:")
logging.info("Validation YAMLs found:")
for validation_name in validations:
print(f"{validation_name}.yaml")
logging.info(f"{validation_name}.yaml")


def get_labels(arg_labels):
Expand Down
4 changes: 2 additions & 2 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import copy
import warnings

import logging
import google.oauth2.service_account
import ibis
import ibis.backends.pandas
Expand Down Expand Up @@ -204,7 +204,7 @@ def get_all_tables(client, allowed_schemas=None):
try:
tables = list_tables(client, schema_name)
except Exception as e:
print(f"List Tables Error: {schema_name} -> {e}")
logging.warning(f"List Tables Error: {schema_name} -> {e}")
continue

for table_name in tables:
Expand Down
6 changes: 3 additions & 3 deletions data_validation/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import datetime
import functools
import json

import logging
import ibis
import ibis.expr.datatypes

Expand Down Expand Up @@ -87,8 +87,8 @@ def generate_report(
documented = _add_metadata(joined, run_metadata)

if verbose:
print("-- ** Combiner Query ** --")
print(documented.compile())
logging.info("-- ** Combiner Query ** --")
logging.info(documented.compile())

result_df = client.execute(documented)
result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True)
Expand Down
6 changes: 3 additions & 3 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,13 +565,13 @@ def build_config_column_aggregates(
if column not in allowlist_columns:
continue
elif column not in casefold_target_columns:
print(
logging.warning(
f"Skipping {agg_type} on {column} as column is not present in target table"
)
continue
elif supported_types and column_type not in supported_types:
if self.verbose:
print(
logging.info(
f"Skipping {agg_type} on {column} due to data type: {column_type}"
)
continue
Expand Down Expand Up @@ -629,7 +629,7 @@ def build_config_calculated_fields(
elif supported_types and column_type not in supported_types:
if self.verbose:
msg = f"Skipping {calc_type} on {column} due to data type: {column_type}"
print(msg)
logging.info(msg)
continue

calculated_config = {
Expand Down
18 changes: 9 additions & 9 deletions data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

import json
import logging
import warnings

import ibis.backends.pandas
import numpy
import pandas
import logging

from data_validation import combiner, consts, metadata
from data_validation.config_manager import ConfigManager
Expand Down Expand Up @@ -274,8 +274,8 @@ def _get_pandas_schema(self, source_df, target_df, join_on_fields, verbose=False
schema_index.append(key)
pd_schema = pandas.Series(schema_data, index=schema_index)
if verbose:
print("-- ** Pandas Schema ** --")
print(pd_schema)
logging.info("-- ** Pandas Schema ** --")
logging.info(pd_schema)

return pd_schema

Expand Down Expand Up @@ -346,12 +346,12 @@ def _execute_validation(self, validation_builder, process_in_memory=True):
)
except Exception as e:
if self.verbose:
print("-- ** Logging Source DF ** --")
print(source_df.dtypes)
print(source_df)
print("-- ** Logging Target DF ** --")
print(target_df.dtypes)
print(target_df)
logging.error("-- ** Logging Source DF ** --")
logging.error(source_df.dtypes)
logging.error(source_df)
logging.error("-- ** Logging Target DF ** --")
logging.error(target_df.dtypes)
logging.error(target_df)
raise e
else:
result_df = combiner.generate_report(
Expand Down
3 changes: 2 additions & 1 deletion data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import ibis
from data_validation import clients, consts
from ibis.expr.types import StringScalar
Expand Down Expand Up @@ -240,7 +241,7 @@ def compile(self, ibis_table):
else:
# TODO: need to build Truncation Int support
# TODO: should be using a logger
print("WARNING: Unknown cast types can cause memory errors")
logging.warning("Unknown cast types can cause memory errors")

# The Casts require we also supply a name.
alias = self.alias or self.field_name
Expand Down
3 changes: 2 additions & 1 deletion data_validation/state_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import enum
import json
import os
import logging
from google.cloud import storage
from typing import Dict, List
from yaml import dump, load, Dumper, Loader
Expand Down Expand Up @@ -160,7 +161,7 @@ def _write_file(self, file_path: str, data: str):
with open(file_path, "w") as file:
file.write(data)

print("Success! Config output written to {}".format(file_path))
logging.info("Success! Config output written to {}".format(file_path))

def _list_directory(self, directory_path: str) -> List[str]:
if self.file_system == FileSystem.GCS:
Expand Down
16 changes: 8 additions & 8 deletions data_validation/validation_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from copy import deepcopy

from data_validation import consts, metadata
Expand Down Expand Up @@ -385,9 +385,9 @@ def get_source_query(self):
else:
query = self.source_builder.compile(**source_config)
if self.verbose:
print(source_config)
print("-- ** Source Query ** --")
print(query.compile())
logging.info(source_config)
logging.info("-- ** Source Query ** --")
logging.info(query.compile())

return query

Expand Down Expand Up @@ -429,9 +429,9 @@ def get_target_query(self):
else:
query = self.target_builder.compile(**target_config)
if self.verbose:
print(target_config)
print("-- ** Target Query ** --")
print(query.compile())
logging.info(target_config)
logging.info("-- ** Target Query ** --")
logging.info(query.compile())

return query

Expand All @@ -451,7 +451,7 @@ def get_query_from_file(self, filename):
file = open(filename, "r")
query = file.read()
except IOError:
print("Cannot read query file: ", filename)
logging.warning("Cannot read query file: ", filename)

if not query or query.isspace():
raise ValueError(
Expand Down
26 changes: 12 additions & 14 deletions tests/unit/test_cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import argparse
import pytest
from unittest import mock

import logging
from data_validation import cli_tools

TEST_CONN = '{"source_type":"Example"}'
Expand Down Expand Up @@ -114,21 +114,20 @@ def test_get_connection_config_from_args():
assert conn["project_id"] == "example-project"


def test_create_and_list_connections(capsys, fs):
def test_create_and_list_connections(caplog, fs):

caplog.set_level(logging.INFO)
# Create Connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_CONNECTION_ARGS)

conn = cli_tools.get_connection_config_from_args(args)
cli_tools.store_connection(args.connection_name, conn)
captured = capsys.readouterr()
assert WRITE_SUCCESS_STRING in captured.out

assert WRITE_SUCCESS_STRING in caplog.records[0].msg

# List Connection
cli_tools.list_connections()
captured = capsys.readouterr()

assert captured.out == "Connection Name: test\n"
assert "Connection Name: test" in caplog.records[1].msg


def test_configure_arg_parser_list_and_run_validation_configs():
Expand All @@ -144,17 +143,16 @@ def test_configure_arg_parser_list_and_run_validation_configs():
assert args.validation_config_cmd == "run"


def test_create_and_list_and_get_validations(capsys, fs):
def test_create_and_list_and_get_validations(caplog, fs):

caplog.set_level(logging.INFO)
# Create validation config file
cli_tools.store_validation("example_validation.yaml", TEST_VALIDATION_CONFIG)
captured = capsys.readouterr()
assert WRITE_SUCCESS_STRING in captured.out
assert WRITE_SUCCESS_STRING in caplog.records[0].msg

# List validation configs
cli_tools.list_validations()
captured = capsys.readouterr()

assert captured.out == "Validation YAMLs found:\nexample_validation.yaml\n"
assert "Validation YAMLs found:" in caplog.records[1].msg

# Retrive the stored vaildation config
yaml_config = cli_tools.get_validation("example_validation.yaml")
Expand Down

0 comments on commit 78222b4

Please sign in to comment.