From 76fcfc691f07ee86d582305f52c5e83fc65664f5 Mon Sep 17 00:00:00 2001 From: nj1973 Date: Wed, 13 Sep 2023 09:07:37 +0100 Subject: [PATCH] fix: Add not-null string to accepted date types in append_pre_agg_calc_field() (#980) * fix: Add not-null string to accepted date types in append_pre_agg_calc_field() * tests: Fix issue with SAMPLE_CONFIG constant mutating and causing unexpected side effects --- data_validation/config_manager.py | 13 +++++------- tests/unit/test_config_manager.py | 33 ++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index c0de93a00..97408c4f3 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -595,10 +595,10 @@ def append_pre_agg_calc_field( ) -> dict: """Append calculated field for length(string) or epoch_seconds(timestamp) for preprocessing before column validation aggregation.""" depth, cast_type = 0, None - if column_type == "string": + if column_type in ["string", "!string"]: calc_func = "length" - elif column_type == "timestamp" or column_type == "!timestamp": + elif column_type in ["timestamp", "!timestamp"]: if ( self.source_client.name == "bigquery" or self.target_client.name == "bigquery" @@ -708,13 +708,10 @@ def decimal_too_big_for_64bit( ].type() if ( - (column_type == "string" or column_type == "!string") + column_type in ["string", "!string"] + or (cast_to_bigint and column_type in ["int32", "!int32"]) or ( - cast_to_bigint - and (column_type == "int32" or column_type == "!int32") - ) - or ( - (column_type == "timestamp" or column_type == "!timestamp") + column_type in ["timestamp", "!timestamp"] and agg_type in ( "sum", diff --git a/tests/unit/test_config_manager.py b/tests/unit/test_config_manager.py index 6e37caf02..888d5252f 100644 --- a/tests/unit/test_config_manager.py +++ b/tests/unit/test_config_manager.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import pytest from data_validation import consts @@ -72,6 +73,13 @@ consts.CONFIG_CAST: None, } +AGGREGATE_CONFIG_C = { + consts.CONFIG_SOURCE_COLUMN: "length__c", + consts.CONFIG_TARGET_COLUMN: "length__c", + consts.CONFIG_FIELD_ALIAS: "min__length__c", + consts.CONFIG_TYPE: "min", +} + CUSTOM_QUERY_VALIDATION_CONFIG = { # BigQuery Specific Connection Config "source_conn": None, @@ -118,16 +126,24 @@ def __init__(self): self.columns = ["a", "b", "c"] def __getitem__(self, key): - return self - - def type(self): - return "int64" + return MockIbisColumn(key) def mutate(self, fields): self.columns = self.columns + fields return self +class MockIbisColumn(object): + def __init__(self, column): + self.column = column + + def type(self): + if self.column == "c": + return "!string" + else: + return "int64" + + @pytest.fixture def module_under_test(): from data_validation import config_manager @@ -270,16 +286,19 @@ def test_build_column_configs(module_under_test): def test_build_config_aggregates(module_under_test): config_manager = module_under_test.ConfigManager( - SAMPLE_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False + copy.copy(SAMPLE_CONFIG), MockIbisClient(), MockIbisClient(), verbose=False ) aggregate_configs = config_manager.build_config_column_aggregates("sum", ["a"], []) assert aggregate_configs[0] == AGGREGATE_CONFIG_A + aggregate_configs = config_manager.build_config_column_aggregates("min", ["c"], []) + assert aggregate_configs[0] == AGGREGATE_CONFIG_C + def test_build_config_aggregates_no_match(module_under_test): config_manager = module_under_test.ConfigManager( - SAMPLE_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False + copy.copy(SAMPLE_CONFIG), MockIbisClient(), MockIbisClient(), verbose=False ) aggregate_configs = config_manager.build_config_column_aggregates( @@ -290,7 +309,7 @@ def test_build_config_aggregates_no_match(module_under_test): def test_build_config_count_aggregate(module_under_test): config_manager = module_under_test.ConfigManager( - SAMPLE_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False + copy.copy(SAMPLE_CONFIG), MockIbisClient(), MockIbisClient(), verbose=False ) agg = config_manager.build_config_count_aggregate()