diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index 0aeeadbf3..5bcb6b95b 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -418,13 +418,9 @@ def build_config_grouped_columns(self, grouped_columns): for column in grouped_columns: if column.casefold() not in casefold_source_columns: - raise ValueError( - f"GroupedColumn DNE in source: {source_table.op().name}.{column}" - ) + raise ValueError(f"Grouped Column DNE in source: {column}") if column.casefold() not in casefold_target_columns: - raise ValueError( - f"GroupedColumn DNE in target: {target_table.op().name}.{column}" - ) + raise ValueError(f"Grouped Column DNE in target: {column}") column_config = { consts.CONFIG_SOURCE_COLUMN: casefold_source_columns[column.casefold()], consts.CONFIG_TARGET_COLUMN: casefold_target_columns[column.casefold()], diff --git a/third_party/ibis/ibis_impala/api.py b/third_party/ibis/ibis_impala/api.py index 4e89c3345..80a747dc0 100644 --- a/third_party/ibis/ibis_impala/api.py +++ b/third_party/ibis/ibis_impala/api.py @@ -61,7 +61,20 @@ def parse_type(t): else: return ValueError(t) elif "struct" in t or "array" in t or "map" in t: - return t.replace("int", "int32") + if "bigint" in t: + t = t.replace("bigint", "int64") + elif "tinyint" in t: + t = t.replace("tinyint", "int8") + elif "smallint" in t: + t = t.replace("smallint", "int16") + else: + t = t.replace("int", "int32") + + if "varchar" in t: + t = t.replace("varchar", "string") + else: + t = t.replace("char","string") + return t else: raise Exception(t) @@ -149,7 +162,7 @@ def fill(target, chunks, na_rep): if have_nulls: if numpy_type in ('bool', 'datetime64[ns]'): target = np.empty(total_length, dtype='O') - na_rep = np.nan + na_rep = None elif numpy_type.startswith('int'): target = np.empty(total_length, dtype='f8') na_rep = np.nan