diff --git a/data_validation/clients.py b/data_validation/clients.py index 0240cb0b0..3980b1832 100644 --- a/data_validation/clients.py +++ b/data_validation/clients.py @@ -140,7 +140,11 @@ def get_ibis_table(client, schema_name, table_name, database_name=None): def get_ibis_query(client, query): """Return Ibis Table from query expression for Supplied Client.""" - return client.sql(query) + iq = client.sql(query) + # Normalise all columns in the query to lower case. + # https://github.com/GoogleCloudPlatform/professional-services-data-validator/issues/992 + iq = iq.relabel(dict(zip(iq.columns, [_.lower() for _ in iq.columns]))) + return iq def get_ibis_table_schema(client, schema_name, table_name): diff --git a/tests/system/data_sources/test_oracle.py b/tests/system/data_sources/test_oracle.py index 66e4067c5..24b58bbc3 100644 --- a/tests/system/data_sources/test_oracle.py +++ b/tests/system/data_sources/test_oracle.py @@ -321,16 +321,16 @@ def test_row_validation_core_types_to_bigquery(): "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, ) -def test_custom_query_validation_core_types(): - """Oracle to Oracle dvt_core_types custom-query validation""" +def test_custom_query_column_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query column validation""" parser = cli_tools.configure_arg_parser() args = parser.parse_args( [ "validate", "custom-query", "column", - "-sc=mock-conn", - "-tc=mock-conn", + "-sc=ora-conn", + "-tc=bq-conn", "--source-query=select * from pso_data_validator.dvt_core_types", "--target-query=select * from pso_data_validator.dvt_core_types", "--filter-status=fail", @@ -346,6 +346,66 @@ def test_custom_query_validation_core_types(): assert len(df) == 0 +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query row comparison-fields validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=ora-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--comparison-fields=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0 + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_hash_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query row hash validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=ora-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--hash=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0 + + @mock.patch( "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, diff --git a/tests/system/data_sources/test_sql_server.py b/tests/system/data_sources/test_sql_server.py index e5f52914d..970c2d51b 100644 --- a/tests/system/data_sources/test_sql_server.py +++ b/tests/system/data_sources/test_sql_server.py @@ -450,16 +450,16 @@ def test_row_validation_core_types_to_bigquery(): "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, ) -def test_custom_query_validation_core_types(): - """SQL Server to SQL Server dvt_core_types custom-query validation""" +def test_custom_query_column_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query column validation""" parser = cli_tools.configure_arg_parser() args = parser.parse_args( [ "validate", "custom-query", "column", - "-sc=mock-conn", - "-tc=mock-conn", + "-sc=sql-conn", + "-tc=bq-conn", "--source-query=select * from pso_data_validator.dvt_core_types", "--target-query=select * from pso_data_validator.dvt_core_types", "--filter-status=fail", @@ -473,3 +473,63 @@ def test_custom_query_validation_core_types(): df = validator.execute() # With filter on failures the data frame should be empty assert len(df) == 0 + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_validation_core_types_to_bigquery(): + """SQL Server to BigQuery dvt_core_types custom-query row comparison-fields validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=sql-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--comparison-fields=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0 + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_hash_validation_core_types_to_bigquery(): + """SQL Server to BigQuery dvt_core_types custom-query row hash validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=sql-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from pso_data_validator.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--hash=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0 diff --git a/tests/system/data_sources/test_teradata.py b/tests/system/data_sources/test_teradata.py index 1247e44c6..cf6964259 100644 --- a/tests/system/data_sources/test_teradata.py +++ b/tests/system/data_sources/test_teradata.py @@ -449,18 +449,18 @@ def test_row_validation_core_types_to_bigquery(): "data_validation.state_manager.StateManager.get_connection_config", new=mock_get_connection_config, ) -def test_custom_query_validation_core_types(): - """Teradata to Teradata dvt_core_types custom-query validation""" +def test_custom_query_column_validation_core_types_to_bigquery(): + """Teradata to BigQuery dvt_core_types custom-query validation""" parser = cli_tools.configure_arg_parser() args = parser.parse_args( [ "validate", "custom-query", "column", - "-sc=mock-conn", - "-tc=mock-conn", + "-sc=td-conn", + "-tc=bq-conn", "--source-query=select * from udf.dvt_core_types", - "--target-query=select * from udf.dvt_core_types", + "--target-query=select * from pso_data_validator.dvt_core_types", "--filter-status=fail", "--count=*", ] @@ -472,3 +472,63 @@ def test_custom_query_validation_core_types(): df = validator.execute() # With filter on failures the data frame should be empty assert len(df) == 0 + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query row validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=td-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from udf.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--comparison-fields=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0 + + +@mock.patch( + "data_validation.state_manager.StateManager.get_connection_config", + new=mock_get_connection_config, +) +def test_custom_query_row_hash_validation_core_types_to_bigquery(): + """Oracle to BigQuery dvt_core_types custom-query row validation""" + parser = cli_tools.configure_arg_parser() + args = parser.parse_args( + [ + "validate", + "custom-query", + "row", + "-sc=td-conn", + "-tc=bq-conn", + "--source-query=select id,col_int64,COL_VARCHAR_30,col_date from udf.dvt_core_types", + "--target-query=select id,col_int64,col_varchar_30,COL_DATE from pso_data_validator.dvt_core_types", + "--primary-keys=id", + "--filter-status=fail", + "--hash=col_int64,col_varchar_30,col_date", + ] + ) + config_managers = main.build_config_managers_from_args(args) + assert len(config_managers) == 1 + config_manager = config_managers[0] + validator = data_validation.DataValidation(config_manager.config, verbose=False) + df = validator.execute() + # With filter on failures the data frame should be empty + assert len(df) == 0