Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Redshift integration for Normal row and Custom-Query Validation. #817

Merged
merged 5 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ The [Examples](https://github.com/GoogleCloudPlatform/professional-services-data

#### Row Validations

(Note: Row hash validation is currently supported for BigQuery, Teradata, Impala/Hive, Oracle, SQL Server, Postgres, Mysql, Db2 and Alloy DB. Struct and array data types are not currently supported.
(Note: Row hash validation is currently supported for BigQuery, Teradata, Impala/Hive, Oracle, SQL Server, Redshift, Postgres, Mysql, Db2 and Alloy DB. Struct and array data types are not currently supported.
In addition, please note that SHA256 is not a supported function on Teradata systems.
If you wish to perform this comparison on Teradata you will need to
[deploy a UDF to perform the conversion](https://github.com/akuroda/teradata-udf-sha2/blob/master/src/sha256.c).)
Expand Down Expand Up @@ -325,7 +325,7 @@ page provides few examples of how this tool can be used to run custom query vali

#### Custom Query Row Validations

(Note: Custom query row validation is currently only supported for BigQuery, Teradata, SQL Server, PostgreSQL, Oracle, AlloyDB, and Impala/Hive. Struct and array data types are not currently supported.)
(Note: Custom query row validation is currently only supported for BigQuery, Teradata, SQL Server, PostgreSQL, Oracle, Redshift, DB2, AlloyDB, and Impala/Hive. Struct and array data types are not currently supported.)

Below is the command syntax for row validations. In order to run row level
validations you need to pass `--hash` flag which will specify the fields
Expand Down
8 changes: 6 additions & 2 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from third_party.ibis.ibis_impala.api import impala_connect
from data_validation import client_info, consts, exceptions
from data_validation.secret_manager import SecretManagerBuilder
from third_party.ibis.ibis_redshift.client import RedShiftClient

ibis.options.sql.default_limit = None

Expand Down Expand Up @@ -147,6 +148,7 @@ def get_ibis_table(client, schema_name, table_name, database_name=None):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.table(table_name, database=database_name, schema=schema_name)
elif type(client) in [PandasClient]:
Expand All @@ -168,7 +170,7 @@ def get_ibis_table_schema(client, schema_name, table_name):
table_name (str): Table name of table object
database_name (str): Database name (generally default is used)
"""
if type(client) in [MySQLClient, PostgreSQLClient]:
if type(client) in [MySQLClient, PostgreSQLClient, RedShiftClient]:
return client.schema(schema_name).table(table_name).schema()
else:
return client.get_schema(table_name, schema_name)
Expand All @@ -181,6 +183,7 @@ def list_schemas(client):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.list_schemas()
elif hasattr(client, "list_databases"):
Expand All @@ -196,6 +199,7 @@ def list_tables(client, schema_name):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.list_tables(schema=schema_name)
elif schema_name:
Expand Down Expand Up @@ -299,7 +303,7 @@ def get_max_column_length(client):
"Oracle": OracleClient,
"FileSystem": get_pandas_client,
"Postgres": PostgreSQLClient,
"Redshift": PostgreSQLClient,
"Redshift": RedShiftClient,
"Teradata": TeradataClient,
"MSSQL": MSSQLClient,
"Snowflake": snowflake_connect,
Expand Down
7 changes: 7 additions & 0 deletions third_party/ibis/ibis_addon/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from third_party.ibis.ibis_mssql.compiler import MSSQLExprTranslator
from ibis.backends.postgres.compiler import PostgreSQLExprTranslator
from ibis.backends.mysql.compiler import MySQLExprTranslator
from third_party.ibis.ibis_redshift.compiler import RedShiftExprTranslator

# avoid errors if Db2 is not installed and not needed
try:
Expand Down Expand Up @@ -236,6 +237,11 @@ def sa_format_hashbytes_db2(translator, expr):
hex = sa.func.hex(hashfunc)
return sa.func.lower(hex)

def sa_format_hashbytes_redshift(translator, expr):
arg, how = expr.op().args
compiled_arg = translator.translate(arg)
return sa.sql.literal_column(f"sha2({compiled_arg}, 256)")

def sa_format_hashbytes_postgres(translator, expr):
arg, how = expr.op().args
compiled_arg = translator.translate(arg)
Expand Down Expand Up @@ -323,6 +329,7 @@ def sa_cast_postgres(t, expr):
MySQLExprTranslator._registry[HashBytes] = sa_format_hashbytes_mysql
MySQLExprTranslator._registry[ops.IfNull] = fixed_arity(sa.func.ifnull, 2)
MySQLExprTranslator._registry[ops.StringJoin] = sa_format_to_stringjoin
RedShiftExprTranslator._registry[HashBytes] = sa_format_hashbytes_redshift

if DB2ExprTranslator: #check if Db2 driver is loaded
DB2ExprTranslator._registry[HashBytes] = sa_format_hashbytes_db2
Empty file.
Loading