Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Issue356 db2 test #383

Merged
merged 22 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
442ecde
feat: add db2 connection
Jan 11, 2022
f5a3f58
feat: add connection
Jan 11, 2022
bf767a6
feat: DB2 connection fix
Jan 28, 2022
5e312a9
fix: do not require db2 client unless needed
ngdav Mar 15, 2022
934020f
fix: Db2 count validation/agg functions, DB2Client
ngdav Mar 16, 2022
f20ea50
style: linting
ngdav Mar 16, 2022
517b182
Fix: Multiple updates (#359)
bluPhy Jan 27, 2022
73c6741
test: Support local integration tests for Teradata, Postgres and SQL …
ajwelch4 Feb 8, 2022
31841cc
fix: supporting non default schemas for mssql (#365)
nehanene15 Feb 10, 2022
ea2bb0c
feat: GCS support for validation configs (#340)
dmedora Feb 17, 2022
796c618
fix: test for nan when calculating fail/success in combiner (#341) (#…
ajwelch4 Feb 18, 2022
35de28f
fix: ensure all statuses are success or fail, particularly after _joi…
ajwelch4 Feb 18, 2022
65e7188
feat: first class support for row level hashing (#345)
renzokuken Feb 23, 2022
368c99b
feat: Hive partitioned tables support (#375)
nehanene15 Mar 1, 2022
acaa0f5
fix: use an appropriate column filter list for schema validation (#35…
ajwelch4 Mar 1, 2022
8d61178
fix: make status values consistent across validation types (#377) (#378)
ajwelch4 Mar 2, 2022
c7a22ce
fix: revert change from #345 that causes filters, threshold and label…
ajwelch4 Mar 3, 2022
24d87bb
feat: Hive hash function support (#392)
nehanene15 Mar 15, 2022
f8b72a3
Merge branch 'develop' into issue356-db2-test
ngdav Mar 16, 2022
0ad01ac
Merge branch 'develop' into issue356-db2-test
dhercher Mar 27, 2022
c43352c
Merge branch 'develop' into issue356-db2-test
ngdav Apr 18, 2022
b35bd1e
docs: add Db2 link to README
ngdav Apr 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@
"Desired Kerberos service name ('impala' if not provided)",
],
],
"DB2": [
["host", "Desired DB2 host"],
["port", "Desired DB2 port (50000 if not provided)"],
["user", "Username to connect to"],
["password", "Password for authentication of user"],
["database", "Database in DB2 to connect to"],
["url", "URL link in DB2 to connect to"],
["driver", "Driver link in DB2 to connect to (default ibm_db_sa)"],
],
}


Expand Down
11 changes: 11 additions & 0 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import third_party.ibis.ibis_addon.datatypes
from third_party.ibis.ibis_cloud_spanner.api import connect as spanner_connect
from third_party.ibis.ibis_impala.api import impala_connect

from data_validation import client_info
from data_validation import consts, exceptions

Expand Down Expand Up @@ -83,6 +84,12 @@ def get_client_call(*args, **kwargs):
"pip install snowflake-connector-python"
)

# If you have Db2 client installed
try:
from third_party.ibis.ibis_DB2.client import DB2Client
except Exception:
DB2Client = _raise_missing_client_error("pip install ibm_db_sa")


def get_bigquery_client(project_id, dataset_id=None, credentials=None):
info = client_info.get_http_client_info()
Expand Down Expand Up @@ -129,6 +136,7 @@ def get_ibis_table(client, schema_name, table_name, database_name=None):
if type(client) in [
OracleClient,
PostgreSQLClient,
DB2Client,
MSSQLClient,
]:
return client.table(table_name, database=database_name, schema=schema_name)
Expand All @@ -143,6 +151,7 @@ def list_schemas(client):
if type(client) in [
OracleClient,
PostgreSQLClient,
DB2Client,
MSSQLClient,
]:
return client.list_schemas()
Expand All @@ -157,6 +166,7 @@ def list_tables(client, schema_name):
if type(client) in [
OracleClient,
PostgreSQLClient,
DB2Client,
MSSQLClient,
]:
return client.list_tables(schema=schema_name)
Expand Down Expand Up @@ -235,4 +245,5 @@ def get_data_client(connection_config):
"MSSQL": MSSQLClient,
"Snowflake": snowflake_connect,
"Spanner": spanner_connect,
"DB2": DB2Client,
}
11 changes: 10 additions & 1 deletion data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,16 @@
RESULT_TYPE_TARGET = "target"

# Ibis Object Info
NUMERIC_DATA_TYPES = ["float64", "int8", "int16", "int32", "int64", "decimal"]
NUMERIC_DATA_TYPES = [
"float64",
"int32",
"int64",
"decimal",
"int32[non-nullable]",
"int64[non-nullable]",
"float64[non-nullable]",
"decimal[non-nullable]",
]

FORMAT_TYPES = ["csv", "json", "table", "text"]

Expand Down
17 changes: 17 additions & 0 deletions docs/connections.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ The data validation tool supports the following connection types.
* [FileSystem](#filesystem)
* [Impala](#Impala)
* [Hive](#Hive)
* [DB2] (#DB2)

As you see above, Teradata and BigQuery have different sets of custom arguments (for example project_id for BQ versus host for Teradata).

Expand Down Expand Up @@ -285,3 +286,19 @@ Please note that for Group By validations, the following property must be set in
"auth_mechanism":"PLAIN"
}
```
## DB2
```
{
# Configuration Required for All Data Sources
"source_type": "DB2",

# Connection Details
"host": "localhost",
"port": 50000,
"driver": "ibm_db_sa",
"user": "my-username",
"password": "my-password",
"database": "my-db",
"url": "my-url",
}
```
2 changes: 1 addition & 1 deletion third_party/ibis/ibis_DB2/alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import ibis.expr.datatypes as dt11
import ibis.expr.datatypes as dts
import ibis.sql.alchemy as s_al
import ibis.backends.base_sqlalchemy.alchemy as s_al
import third_party.ibis.ibis_DB2.expr.datatypes as dt

_ibis_type_to_sqla = {
Expand Down
2 changes: 1 addition & 1 deletion third_party/ibis/ibis_DB2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from ibis.sql.alchemy import to_sqlalchemy
from ibis.backends.base_sqlalchemy.alchemy import to_sqlalchemy
from third_party.ibis.ibis_DB2.client import DB2Client
from third_party.ibis.ibis_DB2.compiler import dialect, rewrites # noqa: F401

Expand Down
30 changes: 28 additions & 2 deletions third_party/ibis/ibis_DB2/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
import ibis.sql.alchemy as alch
import ibis.backends.base_sqlalchemy.alchemy as alch
import third_party.ibis.ibis_DB2.alchemy as db2_alch

# used for literal translate
from ibis.sql.alchemy import (
from ibis.backends.base_sqlalchemy.alchemy import (
_get_sqla_table,
_variance_reduction,
fixed_arity,
Expand Down Expand Up @@ -363,6 +363,29 @@ def reduction_compiler(t, expr):

return reduction_compiler

def _count_start(sa_func):
return sa_func

def _reduction_count(sa_func):
def formatter(t, expr):
op = expr.op()
*args, where = op.args

return _reduction_format(t, sa_func, where, *args)

return formatter

def _reduction_format(t, sa_func, where, arg, *args):
if where is not None:
arg = t.translate(where.ifelse(arg, None))
else:
arg = t.translate(arg)

#Db2 doesn't allow '*' to be parameterized, probably better way to fix this...
if arg == '*':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is is likely to break anything it touches?

I believe you could pull the column list from arg, but failing that perhaps it makes more sense to raise an exception

Copy link
Collaborator

@ngdav ngdav Apr 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue is that in other DBMS, you can bind * to a parameter marker, which isn't allowed in aggregation functions in Db2. The ibis code currently doesn't take this into account. This should only run in the case of count(), count() should have only one parameter, so this should have a limited impact. Replacing * with None instead causes the sqlalchemy layer to correctly substitute count(*) explicitly instead of trying to bind * to a parameter which doesn't work in Db2.
Pulling a column list and counting each column separately is not exactly the same as count(*) and would not be an expected outcome.

arg = None

return sa_func(arg, *map(t.translate, args))

def _log(t, expr):
arg, base = expr.op().args
Expand Down Expand Up @@ -572,6 +595,9 @@ def _day_of_week_name(t, expr):
ops.CumulativeAny: unary(sa.func.bool_or),
ops.IdenticalTo: _identical_to,
ops.HLLCardinality: _hll_cardinality,
# aggregate methods
ops.Count: _reduction_count(sa.func.count),

}
)

Expand Down