Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Issue422 replace print with logging #543

Merged
merged 5 commits into from
Jul 27, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import json
import os
import sys

import logging
from yaml import Dumper, dump

from data_validation import (
Expand Down Expand Up @@ -491,9 +491,13 @@ def validate(args):


def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s-%(levelname)s: %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
# Create Parser and Get Deployment Info
args = cli_tools.get_parsed_args()

if args.command == "connections":
run_connections(args)
elif args.command == "run-config":
Expand Down
3 changes: 2 additions & 1 deletion data_validation/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from data_validation import data_validation
import flask
import pandas
import logging

app = flask.Flask(__name__)

Expand Down Expand Up @@ -62,7 +63,7 @@ def run():
result = validate(config)
return str(result)
except Exception as e:
print(e)
logging.exception(e)
return "Found Error: {}".format(e)


Expand Down
7 changes: 4 additions & 3 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import json
import sys
import uuid
import logging

from data_validation import consts
from data_validation import state_manager
Expand Down Expand Up @@ -684,7 +685,7 @@ def list_connections():
connections = mgr.list_connections()

for conn_name in connections:
print(f"Connection Name: {conn_name}")
logging.info(f"Connection Name: {conn_name}")


def get_connection(connection_name):
Expand Down Expand Up @@ -715,9 +716,9 @@ def list_validations():
mgr = state_manager.StateManager()
validations = mgr.list_validations()

print("Validation YAMLs found:")
logging.info("Validation YAMLs found:")
for validation_name in validations:
print(f"{validation_name}.yaml")
logging.info(f"{validation_name}.yaml")


def get_labels(arg_labels):
Expand Down
4 changes: 2 additions & 2 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

import copy
import warnings

import logging
import google.oauth2.service_account
import ibis
import ibis.backends.pandas
Expand Down Expand Up @@ -204,7 +204,7 @@ def get_all_tables(client, allowed_schemas=None):
try:
tables = list_tables(client, schema_name)
except Exception as e:
print(f"List Tables Error: {schema_name} -> {e}")
logging.warning(f"List Tables Error: {schema_name} -> {e}")
continue

for table_name in tables:
Expand Down
6 changes: 3 additions & 3 deletions data_validation/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import datetime
import functools
import json

import logging
import ibis
import ibis.expr.datatypes

Expand Down Expand Up @@ -87,8 +87,8 @@ def generate_report(
documented = _add_metadata(joined, run_metadata)

if verbose:
print("-- ** Combiner Query ** --")
print(documented.compile())
logging.info("-- ** Combiner Query ** --")
logging.info(documented.compile())

result_df = client.execute(documented)
result_df.validation_status.fillna(consts.VALIDATION_STATUS_FAIL, inplace=True)
Expand Down
6 changes: 3 additions & 3 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,13 +563,13 @@ def build_config_column_aggregates(
if column not in allowlist_columns:
continue
elif column not in casefold_target_columns:
print(
logging.warning(
f"Skipping {agg_type} on {column} as column is not present in target table"
)
continue
elif supported_types and column_type not in supported_types:
if self.verbose:
print(
logging.info(
f"Skipping {agg_type} on {column} due to data type: {column_type}"
)
continue
Expand Down Expand Up @@ -627,7 +627,7 @@ def build_config_calculated_fields(
elif supported_types and column_type not in supported_types:
if self.verbose:
msg = f"Skipping {calc_type} on {column} due to data type: {column_type}"
print(msg)
logging.info(msg)
continue

calculated_config = {
Expand Down
18 changes: 9 additions & 9 deletions data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

import json
import logging
import warnings

import ibis.backends.pandas
import numpy
import pandas
import logging

from data_validation import combiner, consts, metadata
from data_validation.config_manager import ConfigManager
Expand Down Expand Up @@ -274,8 +274,8 @@ def _get_pandas_schema(self, source_df, target_df, join_on_fields, verbose=False
schema_index.append(key)
pd_schema = pandas.Series(schema_data, index=schema_index)
if verbose:
print("-- ** Pandas Schema ** --")
print(pd_schema)
logging.info("-- ** Pandas Schema ** --")
logging.info(pd_schema)

return pd_schema

Expand Down Expand Up @@ -346,12 +346,12 @@ def _execute_validation(self, validation_builder, process_in_memory=True):
)
except Exception as e:
if self.verbose:
print("-- ** Logging Source DF ** --")
print(source_df.dtypes)
print(source_df)
print("-- ** Logging Target DF ** --")
print(target_df.dtypes)
print(target_df)
logging.info("-- ** Logging Source DF ** --")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these seem like error logs

logging.info(source_df.dtypes)
logging.info(source_df)
logging.info("-- ** Logging Target DF ** --")
logging.info(target_df.dtypes)
logging.info(target_df)
raise e
else:
result_df = combiner.generate_report(
Expand Down
3 changes: 2 additions & 1 deletion data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import ibis
from data_validation import clients, consts
from ibis.expr.types import StringScalar
Expand Down Expand Up @@ -240,7 +241,7 @@ def compile(self, ibis_table):
else:
# TODO: need to build Truncation Int support
# TODO: should be using a logger
print("WARNING: Unknown cast types can cause memory errors")
logging.warning("Unknown cast types can cause memory errors")

# The Casts require we also supply a name.
alias = self.alias or self.field_name
Expand Down
7 changes: 4 additions & 3 deletions data_validation/result_handlers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"""

from data_validation import consts
import logging


class TextResultHandler(object):
Expand All @@ -36,11 +37,11 @@ def print_formatted_(self, result_df):
:param result_df
"""
if self.format == "text":
print(result_df.to_string(index=False))
logging.info(result_df.to_string(index=False))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the logging change the nature of this printing (newlines might either split into new logs or be less exportable)

I think printing might be more logical here, but we need to maintain the ability to export into a file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think logging would split into new logs if it has \n in it.
https://stackoverflow.com/questions/22934616/multi-line-logging-in-python.
Do let me know if this makes sense to you

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It something I've seen in stackdriver specifically.

In any case, I'm more worried that you are changing the actual output here since this option is requesting a specific type to be output. CSV and json data would no longer be valid, so anyone piping these out to files would start seeing differences.

Copy link
Contributor Author

@latika-wadhwa latika-wadhwa Jul 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense! Will update it

elif self.format == "csv":
print(result_df.to_csv(index=False))
logging.info(result_df.to_csv(index=False))
elif self.format == "json":
print(result_df.to_json(orient="index"))
logging.info(result_df.to_json(orient="index"))
else:
print(
result_df.drop(self.cols_filter_list, axis=1).to_markdown(
Expand Down
3 changes: 2 additions & 1 deletion data_validation/state_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import enum
import json
import os
import logging
from google.cloud import storage
from typing import Dict, List
from yaml import dump, load, Dumper, Loader
Expand Down Expand Up @@ -160,7 +161,7 @@ def _write_file(self, file_path: str, data: str):
with open(file_path, "w") as file:
file.write(data)

print("Success! Config output written to {}".format(file_path))
logging.info("Success! Config output written to {}".format(file_path))

def _list_directory(self, directory_path: str) -> List[str]:
if self.file_system == FileSystem.GCS:
Expand Down
16 changes: 8 additions & 8 deletions data_validation/validation_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from copy import deepcopy

from data_validation import consts, metadata
Expand Down Expand Up @@ -385,9 +385,9 @@ def get_source_query(self):
else:
query = self.source_builder.compile(**source_config)
if self.verbose:
print(source_config)
print("-- ** Source Query ** --")
print(query.compile())
logging.info(source_config)
logging.info("-- ** Source Query ** --")
logging.info(query.compile())

return query

Expand Down Expand Up @@ -429,9 +429,9 @@ def get_target_query(self):
else:
query = self.target_builder.compile(**target_config)
if self.verbose:
print(target_config)
print("-- ** Target Query ** --")
print(query.compile())
logging.info(target_config)
logging.info("-- ** Target Query ** --")
logging.info(query.compile())

return query

Expand All @@ -451,7 +451,7 @@ def get_query_from_file(self, filename):
file = open(filename, "r")
query = file.read()
except IOError:
print("Cannot read query file: ", filename)
logging.warning("Cannot read query file: ", filename)

if not query or query.isspace():
raise ValueError(
Expand Down
26 changes: 12 additions & 14 deletions tests/unit/test_cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import argparse
import pytest
from unittest import mock

import logging
from data_validation import cli_tools

TEST_CONN = '{"source_type":"Example"}'
Expand Down Expand Up @@ -114,21 +114,20 @@ def test_get_connection_config_from_args():
assert conn["project_id"] == "example-project"


def test_create_and_list_connections(capsys, fs):
def test_create_and_list_connections(caplog, fs):

caplog.set_level(logging.INFO)
# Create Connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_CONNECTION_ARGS)

conn = cli_tools.get_connection_config_from_args(args)
cli_tools.store_connection(args.connection_name, conn)
captured = capsys.readouterr()
assert WRITE_SUCCESS_STRING in captured.out

assert WRITE_SUCCESS_STRING in caplog.records[0].msg

# List Connection
cli_tools.list_connections()
captured = capsys.readouterr()

assert captured.out == "Connection Name: test\n"
assert "Connection Name: test" in caplog.records[1].msg


def test_configure_arg_parser_list_and_run_validation_configs():
Expand All @@ -144,17 +143,16 @@ def test_configure_arg_parser_list_and_run_validation_configs():
assert args.validation_config_cmd == "run"


def test_create_and_list_and_get_validations(capsys, fs):
def test_create_and_list_and_get_validations(caplog, fs):

caplog.set_level(logging.INFO)
# Create validation config file
cli_tools.store_validation("example_validation.yaml", TEST_VALIDATION_CONFIG)
captured = capsys.readouterr()
assert WRITE_SUCCESS_STRING in captured.out
assert WRITE_SUCCESS_STRING in caplog.records[0].msg

# List validation configs
cli_tools.list_validations()
captured = capsys.readouterr()

assert captured.out == "Validation YAMLs found:\nexample_validation.yaml\n"
assert "Validation YAMLs found:" in caplog.records[1].msg

# Retrive the stored vaildation config
yaml_config = cli_tools.get_validation("example_validation.yaml")
Expand Down