Skip to content

Commit

Permalink
fix: Hive boolean nan to None, Unsupported ibis data types in structs…
Browse files Browse the repository at this point in the history
… and arrays (#444)

* fix: boolean nan issue for Hive, fixes column agg for tables with unsupported ibis data types

* blacken updates after merge

* add varchar and char support
  • Loading branch information
nehanene15 committed Apr 14, 2022
1 parent 6a54527 commit e94a1da
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
8 changes: 2 additions & 6 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,9 @@ def build_config_grouped_columns(self, grouped_columns):
for column in grouped_columns:

if column.casefold() not in casefold_source_columns:
raise ValueError(
f"GroupedColumn DNE in source: {source_table.op().name}.{column}"
)
raise ValueError(f"Grouped Column DNE in source: {column}")
if column.casefold() not in casefold_target_columns:
raise ValueError(
f"GroupedColumn DNE in target: {target_table.op().name}.{column}"
)
raise ValueError(f"Grouped Column DNE in target: {column}")
column_config = {
consts.CONFIG_SOURCE_COLUMN: casefold_source_columns[column.casefold()],
consts.CONFIG_TARGET_COLUMN: casefold_target_columns[column.casefold()],
Expand Down
17 changes: 15 additions & 2 deletions third_party/ibis/ibis_impala/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,20 @@ def parse_type(t):
else:
return ValueError(t)
elif "struct" in t or "array" in t or "map" in t:
return t.replace("int", "int32")
if "bigint" in t:
t = t.replace("bigint", "int64")
elif "tinyint" in t:
t = t.replace("tinyint", "int8")
elif "smallint" in t:
t = t.replace("smallint", "int16")
else:
t = t.replace("int", "int32")

if "varchar" in t:
t = t.replace("varchar", "string")
else:
t = t.replace("char","string")
return t
else:
raise Exception(t)

Expand Down Expand Up @@ -149,7 +162,7 @@ def fill(target, chunks, na_rep):
if have_nulls:
if numpy_type in ('bool', 'datetime64[ns]'):
target = np.empty(total_length, dtype='O')
na_rep = np.nan
na_rep = None
elif numpy_type.startswith('int'):
target = np.empty(total_length, dtype='f8')
na_rep = np.nan
Expand Down

0 comments on commit e94a1da

Please sign in to comment.