From 1a157aed71bc9ba9470be49a892f096e7dfd02f5 Mon Sep 17 00:00:00 2001 From: Neha Nene Date: Thu, 21 Sep 2023 02:23:03 -0400 Subject: [PATCH] fix: support for case insensitive PKs and Snowflake random row (#998) --- data_validation/config_manager.py | 4 ++-- data_validation/data_validation.py | 4 ++-- data_validation/query_builder/random_row_builder.py | 1 + third_party/ibis/ibis_addon/operations.py | 3 +++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index c569a156d..259f59d31 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -522,9 +522,9 @@ def build_column_configs(self, columns): for column in columns: if column.casefold() not in casefold_source_columns: - raise ValueError(f"Grouped Column DNE in source: {column}") + raise ValueError(f"Column DNE in source: {column}") if column.casefold() not in casefold_target_columns: - raise ValueError(f"Grouped Column DNE in target: {column}") + raise ValueError(f"Column DNE in target: {column}") column_config = { consts.CONFIG_SOURCE_COLUMN: casefold_source_columns[column.casefold()], consts.CONFIG_TARGET_COLUMN: casefold_target_columns[column.casefold()], diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index b1ee18b72..e04b01735 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -134,13 +134,13 @@ def _add_random_row_filter(self): filter_field = { consts.CONFIG_TYPE: consts.FILTER_TYPE_ISIN, consts.CONFIG_FILTER_SOURCE_COLUMN: primary_key_info[ - consts.CONFIG_SOURCE_COLUMN + consts.CONFIG_FIELD_ALIAS ], consts.CONFIG_FILTER_SOURCE_VALUE: random_rows[ primary_key_info[consts.CONFIG_SOURCE_COLUMN] ], consts.CONFIG_FILTER_TARGET_COLUMN: primary_key_info[ - consts.CONFIG_TARGET_COLUMN + consts.CONFIG_FIELD_ALIAS ], consts.CONFIG_FILTER_TARGET_VALUE: random_rows[ primary_key_info[consts.CONFIG_SOURCE_COLUMN] diff --git a/data_validation/query_builder/random_row_builder.py b/data_validation/query_builder/random_row_builder.py index 91ce64646..fac70a82e 100644 --- a/data_validation/query_builder/random_row_builder.py +++ b/data_validation/query_builder/random_row_builder.py @@ -32,6 +32,7 @@ "mysql", "spanner", "redshift", + "snowflake", ] diff --git a/third_party/ibis/ibis_addon/operations.py b/third_party/ibis/ibis_addon/operations.py index 6f14f0d5e..90403a03a 100644 --- a/third_party/ibis/ibis_addon/operations.py +++ b/third_party/ibis/ibis_addon/operations.py @@ -346,6 +346,8 @@ def _sa_string_join(t, op): def sa_format_new_id(t, op): return sa.func.NEWID() +def sa_format_random(t, op): + return sa.func.RANDOM() _BQ_DTYPE_TO_IBIS_TYPE["TIMESTAMP"] = dt.Timestamp(timezone="UTC") @@ -442,3 +444,4 @@ def _bigquery_field_to_ibis_dtype(field): SnowflakeExprTranslator._registry[RawSQL] = sa_format_raw_sql SnowflakeExprTranslator._registry[IfNull] = sa_fixed_arity(sa.func.ifnull, 2) SnowflakeExprTranslator._registry[ExtractEpochSeconds] = sa_epoch_time_snowflake + SnowflakeExprTranslator._registry[RandomScalar] = sa_format_random