Skip to content

Commit

Permalink
feat: Support BQ decimal precision and scale for schema validation (#960
Browse files Browse the repository at this point in the history
)

* feat: support BQ decimal precision and scale for schema validation, limit Teradata query to one row for custom query schema

* feat: limit Hive function to one row for get_schema_using_query function
  • Loading branch information
nehanene15 committed Aug 29, 2023
1 parent 9ad529a commit b1d4942
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 27 deletions.
4 changes: 0 additions & 4 deletions tests/system/data_sources/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,6 @@ def test_schema_validation_core_types_to_bigquery():
(
# All Hive integrals go to BigQuery INT64.
"--allow-list=int8:int64,int16:int64,int32:int64,"
# Hive decimals that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# Hive decimals that map to BigQuery BIGNUMERIC.
"decimal(38,0):decimal(76,38),"
# Hive does not have a time zoned
"timestamp:timestamp('UTC'),"
# BigQuery does not have a float32 type.
Expand Down
4 changes: 0 additions & 4 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,6 @@ def test_schema_validation_core_types_to_bigquery():
(
# Integral Oracle NUMBERS go to BigQuery INT64.
"--allow-list=decimal(2,0):int64,decimal(4,0):int64,decimal(9,0):int64,decimal(18,0):int64,"
# Oracle NUMBERS that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# Oracle NUMBERS that map to BigQuery BIGNUMERIC.
"decimal(38,0):decimal(76,38),"
# BigQuery does not have a float32 type.
"float32:float64"
),
Expand Down
4 changes: 0 additions & 4 deletions tests/system/data_sources/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,6 @@ def test_schema_validation_core_types_to_bigquery():
(
# PostgreSQL integrals go to BigQuery INT64.
"--allow-list=int16:int64,int32:int64,"
# Oracle NUMBERS that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# Oracle NUMBERS that map to BigQuery BIGNUMERIC.
"decimal(38,0):decimal(76,38),"
# BigQuery does not have a float32 type.
"float32:float64"
),
Expand Down
4 changes: 0 additions & 4 deletions tests/system/data_sources/test_snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,6 @@ def test_schema_validation_core_types_to_bigquery():
(
# Integral Snowflake NUMBERs to to BigQuery INT64.
"--allow-list=decimal(38,0):int64,"
# Snowflake NUMBERS that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# Snowflake NUMBERS that map to BigQuery BIGNUMERIC
"decimal(38,0):decimal(76,38),"
# TODO When issue-706 is complete remove the timestamp line below
"timestamp('UTC'):timestamp"
),
Expand Down
4 changes: 0 additions & 4 deletions tests/system/data_sources/test_sql_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,10 +292,6 @@ def test_schema_validation_core_types_to_bigquery():
(
# All SQL Server integrals go to BigQuery INT64.
"--allow-list=int8:int64,int16:int64,int32:int64,"
# SQL Server decimals that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# SQL Server decimals that map to BigQuery BIGNUMERIC.
"decimal(38,0):decimal(76,38),"
# BigQuery does not have a float32 type.
"float32:float64"
),
Expand Down
6 changes: 1 addition & 5 deletions tests/system/data_sources/test_teradata.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,7 @@ def test_schema_validation_core_types_to_bigquery():
"--exclusion-columns=id",
(
# Teradata integrals go to BigQuery INT64.
"--allow-list=int8:int64,int16:int64,int32:int64,"
# Teradata NUMBERS that map to BigQuery NUMERIC.
"decimal(20,0):decimal(38,9),decimal(10,2):decimal(38,9),"
# Teradata NUMBERS that map to BigQuery BIGNUMERIC.
"decimal(38,0):decimal(76,38)"
"--allow-list=int8:int64,int16:int64,int32:int64"
),
]
)
Expand Down
16 changes: 16 additions & 0 deletions third_party/ibis/ibis_addon/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,22 @@ def _bigquery_field_to_ibis_dtype(field):
names = [el.name for el in fields]
ibis_types = list(map(dt.dtype, fields))
ibis_type = dt.Struct(dict(zip(names, ibis_types)))
elif typ == "NUMERIC":
if not field.precision and not field.scale:
return dt.Decimal(precision=38, scale=9, nullable=field.is_nullable)
return dt.Decimal(
precision=field.precision,
scale=field.scale or 0,
nullable=field.is_nullable,
)
elif typ == "BIGNUMERIC":
if not field.precision and not field.scale:
return dt.Decimal(precision=76, scale=38, nullable=field.is_nullable)
return dt.Decimal(
precision=field.precision,
scale=field.scale or 0,
nullable=field.is_nullable,
)
else:
ibis_type = _BQ_LEGACY_TO_STANDARD.get(typ, typ)
if ibis_type in _BQ_DTYPE_TO_IBIS_TYPE:
Expand Down
2 changes: 1 addition & 1 deletion third_party/ibis/ibis_impala/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _if_null(op):

def _get_schema_using_query(self, query):
# Removing LIMIT 0 around query since it returns no results in Hive
cur = self.raw_sql(query)
cur = self.raw_sql(f"SELECT * FROM ({query}) t0 LIMIT 1")
cur.fetchall()
ibis_fields = self._adapt_types(cur.description)
cur.release()
Expand Down
2 changes: 1 addition & 1 deletion third_party/ibis/ibis_teradata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def get_schema(self, table_name: str, database: str = None) -> sch.Schema:
return sch.Schema(schema)

def _get_schema_using_query(self, query):
cur = self.raw_sql(query)
cur = self.raw_sql(f"SELECT TOP 1 * FROM ({query}) AS t0")
# resets the state of the cursor and closes operation
cur.fetchall()
ibis_fields = self._adapt_types(cur.description)
Expand Down

0 comments on commit b1d4942

Please sign in to comment.