Skip to content

Commit

Permalink
feat: Redshift integration for Normal row and Custom-Query Validation. (
Browse files Browse the repository at this point in the history
#817)

* feat: Redshift integration for Normal and Custom-Query Validation

* Updating README

* fix: Removing default datatypes in _parse_numeric

* refactor: refactor return value of get_schema_using_query

* Datatypes edits
  • Loading branch information
piyushsarraf committed May 4, 2023
1 parent 88cd281 commit 92ab215
Show file tree
Hide file tree
Showing 6 changed files with 381 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ The [Examples](https://github.com/GoogleCloudPlatform/professional-services-data

#### Row Validations

(Note: Row hash validation is currently supported for BigQuery, Teradata, Impala/Hive, Oracle, SQL Server, Postgres, Mysql, Db2 and Alloy DB. Struct and array data types are not currently supported.
(Note: Row hash validation is currently supported for BigQuery, Teradata, Impala/Hive, Oracle, SQL Server, Redshift, Postgres, Mysql, Db2 and Alloy DB. Struct and array data types are not currently supported.
In addition, please note that SHA256 is not a supported function on Teradata systems.
If you wish to perform this comparison on Teradata you will need to
[deploy a UDF to perform the conversion](https://github.com/akuroda/teradata-udf-sha2/blob/master/src/sha256.c).)
Expand Down Expand Up @@ -325,7 +325,7 @@ page provides few examples of how this tool can be used to run custom query vali

#### Custom Query Row Validations

(Note: Custom query row validation is currently only supported for BigQuery, Teradata, SQL Server, PostgreSQL, Oracle, AlloyDB, and Impala/Hive. Struct and array data types are not currently supported.)
(Note: Custom query row validation is currently only supported for BigQuery, Teradata, SQL Server, PostgreSQL, Oracle, Redshift, DB2, AlloyDB, and Impala/Hive. Struct and array data types are not currently supported.)

Below is the command syntax for row validations. In order to run row level
validations you need to pass `--hash` flag which will specify the fields
Expand Down
8 changes: 6 additions & 2 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from third_party.ibis.ibis_impala.api import impala_connect
from data_validation import client_info, consts, exceptions
from data_validation.secret_manager import SecretManagerBuilder
from third_party.ibis.ibis_redshift.client import RedShiftClient

ibis.options.sql.default_limit = None

Expand Down Expand Up @@ -147,6 +148,7 @@ def get_ibis_table(client, schema_name, table_name, database_name=None):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.table(table_name, database=database_name, schema=schema_name)
elif type(client) in [PandasClient]:
Expand All @@ -168,7 +170,7 @@ def get_ibis_table_schema(client, schema_name, table_name):
table_name (str): Table name of table object
database_name (str): Database name (generally default is used)
"""
if type(client) in [MySQLClient, PostgreSQLClient]:
if type(client) in [MySQLClient, PostgreSQLClient, RedShiftClient]:
return client.schema(schema_name).table(table_name).schema()
else:
return client.get_schema(table_name, schema_name)
Expand All @@ -181,6 +183,7 @@ def list_schemas(client):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.list_schemas()
elif hasattr(client, "list_databases"):
Expand All @@ -196,6 +199,7 @@ def list_tables(client, schema_name):
PostgreSQLClient,
DB2Client,
MSSQLClient,
RedShiftClient,
]:
return client.list_tables(schema=schema_name)
elif schema_name:
Expand Down Expand Up @@ -299,7 +303,7 @@ def get_max_column_length(client):
"Oracle": OracleClient,
"FileSystem": get_pandas_client,
"Postgres": PostgreSQLClient,
"Redshift": PostgreSQLClient,
"Redshift": RedShiftClient,
"Teradata": TeradataClient,
"MSSQL": MSSQLClient,
"Snowflake": snowflake_connect,
Expand Down
7 changes: 7 additions & 0 deletions third_party/ibis/ibis_addon/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from third_party.ibis.ibis_mssql.compiler import MSSQLExprTranslator
from ibis.backends.postgres.compiler import PostgreSQLExprTranslator
from ibis.backends.mysql.compiler import MySQLExprTranslator
from third_party.ibis.ibis_redshift.compiler import RedShiftExprTranslator

# avoid errors if Db2 is not installed and not needed
try:
Expand Down Expand Up @@ -236,6 +237,11 @@ def sa_format_hashbytes_db2(translator, expr):
hex = sa.func.hex(hashfunc)
return sa.func.lower(hex)

def sa_format_hashbytes_redshift(translator, expr):
arg, how = expr.op().args
compiled_arg = translator.translate(arg)
return sa.sql.literal_column(f"sha2({compiled_arg}, 256)")

def sa_format_hashbytes_postgres(translator, expr):
arg, how = expr.op().args
compiled_arg = translator.translate(arg)
Expand Down Expand Up @@ -323,6 +329,7 @@ def sa_cast_postgres(t, expr):
MySQLExprTranslator._registry[HashBytes] = sa_format_hashbytes_mysql
MySQLExprTranslator._registry[ops.IfNull] = fixed_arity(sa.func.ifnull, 2)
MySQLExprTranslator._registry[ops.StringJoin] = sa_format_to_stringjoin
RedShiftExprTranslator._registry[HashBytes] = sa_format_hashbytes_redshift

if DB2ExprTranslator: #check if Db2 driver is loaded
DB2ExprTranslator._registry[HashBytes] = sa_format_hashbytes_db2
Empty file.
Loading

0 comments on commit 92ab215

Please sign in to comment.