Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into issue-926-schema-v…
Browse files Browse the repository at this point in the history
…alidations-ignore-notnull
  • Loading branch information
nj1973 committed Aug 9, 2023
2 parents 2ddfde3 + ee7ae9a commit 2f21510
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 24 deletions.
82 changes: 74 additions & 8 deletions tests/system/data_sources/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,13 @@ def test_mysql_row():
pass


def mock_get_connection_config(*args):
if args[1] in ("mysql-conn", "mock-conn"):
return CONN
elif args[1] == "bq-conn":
return BQ_CONN


# Expected result from partitioning table on 3 keys
EXPECTED_PARTITION_FILTER = [
"course_id < 'ALG001' OR course_id = 'ALG001' AND (quarter_id < 3 OR quarter_id = 3 AND (student_id < 1234))",
Expand All @@ -449,9 +456,9 @@ def test_mysql_row():

@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
return_value=CONN,
new=mock_get_connection_config,
)
def test_mysql_generate_table_partitions(mock_conn):
def test_mysql_generate_table_partitions():
"""Test generate table partitions on mysql
The unit tests, specifically test_add_partition_filters_to_config and test_store_yaml_partitions_local
check that yaml configurations are created and saved in local storage. Partitions can only be created with
Expand Down Expand Up @@ -535,9 +542,9 @@ def test_schema_validation_not_null_vs_nullable():

@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
return_value=CONN,
new=mock_get_connection_config,
)
def test_column_validation_core_types(mock_conn):
def test_column_validation_core_types():
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
Expand All @@ -563,9 +570,40 @@ def test_column_validation_core_types(mock_conn):

@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
return_value=CONN,
new=mock_get_connection_config,
)
def test_column_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# TODO Change --sum string below to include col_datetime and col_tstz when issue-762 is complete.
# TODO Change --sum, --min and --max options to include col_char_2 when issue-842 is complete.
# We've excluded col_float32 because BigQuery does not have an exact same type and float32/64 are lossy and cannot be compared.
args = parser.parse_args(
[
"validate",
"column",
"-sc=mysql-conn",
"-tc=bq-conn",
"-tbls=pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--sum=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date",
"--min=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime,col_tstz",
"--max=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime,col_tstz",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_core_types(mock_conn):
def test_row_validation_core_types():
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
Expand All @@ -590,9 +628,37 @@ def test_row_validation_core_types(mock_conn):

@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
return_value=CONN,
new=mock_get_connection_config,
)
def test_row_validation_core_types_to_bigquery():
# TODO Change --hash string below to include col_float32,col_float64 when issue-841 is complete.
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"row",
"-sc=mysql-conn",
"-tc=bq-conn",
"-tbls=pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--hash=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_varchar_30,col_char_2,col_string,col_date,col_datetime,col_tstz",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_validation_core_types(mock_conn):
def test_custom_query_validation_core_types():
"""MySQL to MySQL dvt_core_types custom-query validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
Expand Down
8 changes: 3 additions & 5 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ def test_column_validation_core_types():
def test_column_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# TODO Change --sum string below to include col_datetime and col_tstz when issue-762 is complete.
# TODO Change --min/max strings below to include col_tstz when issue-917 is complete.
# We've excluded col_float32 because BigQuery does not have an exact same type and float32/64 are lossy and cannot be compared.
args = parser.parse_args(
[
Expand All @@ -254,8 +253,8 @@ def test_column_validation_core_types_to_bigquery():
"-tbls=pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--sum=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date",
"--min=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_datetime",
"--max=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_datetime",
"--min=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_datetime,col_tstz",
"--max=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_datetime,col_tstz",
]
)
config_managers = main.build_config_managers_from_args(args)
Expand Down Expand Up @@ -300,7 +299,6 @@ def test_row_validation_core_types():
new=mock_get_connection_config,
)
def test_row_validation_core_types_to_bigquery():
# TODO Change --hash string below to include col_tstz when issue-917 is complete.
# TODO Change --hash string below to include col_float32,col_float64 when issue-841 is complete.
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
Expand All @@ -312,7 +310,7 @@ def test_row_validation_core_types_to_bigquery():
"-tbls=pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--hash=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_varchar_30,col_char_2,col_string,col_date,col_datetime",
"--hash=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_varchar_30,col_char_2,col_string,col_date,col_datetime,col_tstz",
]
)
config_managers = main.build_config_managers_from_args(args)
Expand Down
7 changes: 3 additions & 4 deletions tests/system/data_sources/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,6 @@ def test_column_validation_core_types():
)
def test_column_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# TODO Change --min/max strings below to include col_tstz when issue-917 is complete.
# We've excluded col_float32 because BigQuery does not have an exact same type and float32/64 are lossy and cannot be compared.
# TODO Change --sum and --max options to include col_char_2 when issue-842 is complete.
args = parser.parse_args(
Expand All @@ -652,9 +651,9 @@ def test_column_validation_core_types_to_bigquery():
"-tc=bq-conn",
"-tbls=pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--sum=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime",
"--min=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime",
"--max=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime",
"--sum=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime,col_tstz",
"--min=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime,col_tstz",
"--max=col_int8,col_int16,col_int32,col_int64,col_dec_20,col_dec_38,col_dec_10_2,col_float64,col_varchar_30,col_string,col_date,col_datetime,col_tstz",
]
)
config_managers = main.build_config_managers_from_args(args)
Expand Down
7 changes: 3 additions & 4 deletions tests/system/data_sources/test_sql_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,6 @@ def test_column_validation_core_types():
)
def test_column_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# TODO Change --sum/min/max strings below to include col_tstz when issue-917 is complete.
# We've excluded col_float32 because BigQuery does not have an exact same type and float32/64 are lossy and cannot be compared.
# We've excluded col_char_2 since the data stored in MSSQL has a trailing space which is counted in the LEN()
args = parser.parse_args(
Expand All @@ -381,8 +380,8 @@ def test_column_validation_core_types_to_bigquery():
"-tbls=pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--sum=col_int8,col_int16,col_int32,col_int64,col_float64,col_date,col_datetime,col_dec_10_2,col_dec_20,col_dec_38,col_varchar_30,col_char_2,col_string",
"--min=col_int8,col_int16,col_int32,col_int64,col_float64,col_date,col_datetime,col_dec_10_2,col_dec_20,col_dec_38,col_varchar_30,col_char_2,col_string",
"--max=col_int8,col_int16,col_int32,col_int64,col_float64,col_date,col_datetime,col_dec_10_2,col_dec_20,col_dec_38,col_varchar_30,col_char_2,col_string",
"--min=col_int8,col_int16,col_int32,col_int64,col_float64,col_date,col_datetime,col_tstz,col_dec_10_2,col_dec_20,col_dec_38,col_varchar_30,col_char_2,col_string",
"--max=col_int8,col_int16,col_int32,col_int64,col_float64,col_date,col_datetime,col_tstz,col_dec_10_2,col_dec_20,col_dec_38,col_varchar_30,col_char_2,col_string",
]
)
config_managers = main.build_config_managers_from_args(args)
Expand Down Expand Up @@ -429,7 +428,7 @@ def test_row_validation_core_types():
def test_row_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# TODO When issue-834 is complete add col_string to --hash string below.
# TODO Change --hash string below to include col_tstz when issue-917 is complete.
# TODO Change --hash string below to include col_tstz when issue-929 is complete.
# TODO Change --hash string below to include col_float32,col_float64 when issue-841 is complete.
args = parser.parse_args(
[
Expand Down
6 changes: 3 additions & 3 deletions tests/system/data_sources/test_teradata.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,8 @@ def test_column_validation_core_types_to_bigquery():
"-tbls=udf.dvt_core_types=pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--sum=col_int8,col_int16,col_int32,col_int64,col_float32,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_dec_20,col_dec_38,col_dec_10_2",
"--min=col_int8,col_int16,col_int32,col_int64,col_float32,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_dec_20,col_dec_38,col_dec_10_2",
"--max=col_int8,col_int16,col_int32,col_int64,col_float32,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_dec_20,col_dec_38,col_dec_10_2",
"--min=col_int8,col_int16,col_int32,col_int64,col_float32,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_tstz,col_dec_20,col_dec_38,col_dec_10_2",
"--max=col_int8,col_int16,col_int32,col_int64,col_float32,col_float64,col_varchar_30,col_char_2,col_string,col_date,col_tstz,col_dec_20,col_dec_38,col_dec_10_2",
]
)
config_managers = main.build_config_managers_from_args(args)
Expand Down Expand Up @@ -427,7 +427,7 @@ def test_row_validation_core_types_to_bigquery():
parser = cli_tools.configure_arg_parser()
# Excluded col_string because LONG VARCHAR column causes exception regardless of column contents:
# [Error 3798] A column or character expression is larger than the max size.
# TODO Change --hash option to include col_tstz when issue-917 is complete.
# TODO Change --hash option to include col_tstz when issue-929 is complete.
# TODO Change --hash option to include col_float32,col_float64 when issue-841 is complete.
args = parser.parse_args(
[
Expand Down
6 changes: 6 additions & 0 deletions third_party/ibis/ibis_oracle/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,12 @@ def _table_column(t, op):
out_expr = get_col(sa_table, op)
out_expr.quote = t._quote_column_names

if op.output_dtype.is_timestamp():
timezone = op.output_dtype.timezone
if timezone is not None:
# Using literal_column on Oracle because the time zone string cannot be a bind.
out_expr = sa.literal_column(f"{out_expr.name} AT TIME ZONE '{timezone}'").label(op.name)

# If the column does not originate from the table set in the current SELECT
# context, we should format as a subquery
if t.permit_subquery and ctx.is_foreign_expr(table):
Expand Down

0 comments on commit 2f21510

Please sign in to comment.