Skip to content

Commit

Permalink
fix: custom query row validation failing when SQL contains upper case…
Browse files Browse the repository at this point in the history
…d columns (#994)

* fix: Lowercase column names from custom query cursor description to match table equivalent which come back from Ibis in lower case

* Revert "fix: Lowercase column names from custom query cursor description to match table equivalent which come back from Ibis in lower case"

This reverts commit 963b370.

* fix: Lowercase column names from custom query schema to match table equivalent which comes back from Ibis in lower case

* tests: Add tests of custom-queries with mixed case in query projections
  • Loading branch information
nj1973 committed Sep 20, 2023
1 parent a4cf773 commit a9fed41
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 14 deletions.
6 changes: 5 additions & 1 deletion data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def get_ibis_table(client, schema_name, table_name, database_name=None):

def get_ibis_query(client, query):
"""Return Ibis Table from query expression for Supplied Client."""
return client.sql(query)
iq = client.sql(query)
# Normalise all columns in the query to lower case.
# https://github.com/GoogleCloudPlatform/professional-services-data-validator/issues/992
iq = iq.relabel(dict(zip(iq.columns, [_.lower() for _ in iq.columns])))
return iq


def get_ibis_table_schema(client, schema_name, table_name):
Expand Down
68 changes: 64 additions & 4 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,16 +321,16 @@ def test_row_validation_core_types_to_bigquery():
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_validation_core_types():
"""Oracle to Oracle dvt_core_types custom-query validation"""
def test_custom_query_column_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query column validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"column",
"-sc=mock-conn",
"-tc=mock-conn",
"-sc=ora-conn",
"-tc=bq-conn",
"--source-query=select * from pso_data_validator.dvt_core_types",
"--target-query=select * from pso_data_validator.dvt_core_types",
"--filter-status=fail",
Expand All @@ -346,6 +346,66 @@ def test_custom_query_validation_core_types():
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query row comparison-fields validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=ora-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--comparison-fields=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_hash_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query row hash validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=ora-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--hash=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
Expand Down
68 changes: 64 additions & 4 deletions tests/system/data_sources/test_sql_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,16 +450,16 @@ def test_row_validation_core_types_to_bigquery():
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_validation_core_types():
"""SQL Server to SQL Server dvt_core_types custom-query validation"""
def test_custom_query_column_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query column validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"column",
"-sc=mock-conn",
"-tc=mock-conn",
"-sc=sql-conn",
"-tc=bq-conn",
"--source-query=select * from pso_data_validator.dvt_core_types",
"--target-query=select * from pso_data_validator.dvt_core_types",
"--filter-status=fail",
Expand All @@ -473,3 +473,63 @@ def test_custom_query_validation_core_types():
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_validation_core_types_to_bigquery():
"""SQL Server to BigQuery dvt_core_types custom-query row comparison-fields validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=sql-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--comparison-fields=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_hash_validation_core_types_to_bigquery():
"""SQL Server to BigQuery dvt_core_types custom-query row hash validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=sql-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--hash=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0
70 changes: 65 additions & 5 deletions tests/system/data_sources/test_teradata.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,18 +449,18 @@ def test_row_validation_core_types_to_bigquery():
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_validation_core_types():
"""Teradata to Teradata dvt_core_types custom-query validation"""
def test_custom_query_column_validation_core_types_to_bigquery():
"""Teradata to BigQuery dvt_core_types custom-query validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"column",
"-sc=mock-conn",
"-tc=mock-conn",
"-sc=td-conn",
"-tc=bq-conn",
"--source-query=select * from udf.dvt_core_types",
"--target-query=select * from udf.dvt_core_types",
"--target-query=select * from pso_data_validator.dvt_core_types",
"--filter-status=fail",
"--count=*",
]
Expand All @@ -472,3 +472,63 @@ def test_custom_query_validation_core_types():
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query row validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=td-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from udf.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--comparison-fields=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_custom_query_row_hash_validation_core_types_to_bigquery():
"""Oracle to BigQuery dvt_core_types custom-query row validation"""
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(
[
"validate",
"custom-query",
"row",
"-sc=td-conn",
"-tc=bq-conn",
"--source-query=select id,col_int64,COL_VARCHAR_30,col_date from udf.dvt_core_types",
"--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types",
"--primary-keys=id",
"--filter-status=fail",
"--hash=col_int64,col_varchar_30,col_date",
]
)
config_managers = main.build_config_managers_from_args(args)
assert len(config_managers) == 1
config_manager = config_managers[0]
validator = data_validation.DataValidation(config_manager.config, verbose=False)
df = validator.execute()
# With filter on failures the data frame should be empty
assert len(df) == 0

0 comments on commit a9fed41

Please sign in to comment.