Skip to content

Commit

Permalink
feat: Add --url to Oracle connections add options (#1083)
Browse files Browse the repository at this point in the history
* feat: Add --url to Oracle connections add options

* tests: Add unit tests for Oracle connections add options

* tests: Freeze pytest at version 7.4.4
  • Loading branch information
nj1973 committed Feb 2, 2024
1 parent 9e70e9e commit 2f078c2
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 31 deletions.
18 changes: 10 additions & 8 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
["user", "User used to connect"],
["password", "Password for supplied user"],
["database", "Database to connect to"],
["url", "Oracle SQLAlchemy connection URL"],
],
"MSSQL": [
["host", "Desired SQL Server host (default localhost)"],
Expand All @@ -99,18 +100,18 @@
["connect_args", "(Optional) Additional connection arg mapping"],
],
"Postgres": [
["host", "Desired Postgres host."],
["port", "Postgres port to connect on (ie. 5432)"],
["host", "Desired PostgreSQL host."],
["port", "PostgreSQL port to connect on (e.g. 5432)"],
["user", "Username to connect to"],
["password", "Password for authentication of user"],
["database", "Database in postgres to connect to (default postgres)"],
["database", "Database in PostgreSQL to connect to (default postgres)"],
],
"Redshift": [
["host", "Desired Postgres host."],
["port", "Postgres port to connect on (ie. 5439)"],
["host", "Desired Redshift host."],
["port", "Redshift port to connect on (e.g. 5439)"],
["user", "Username to connect to"],
["password", "Password for authentication of user"],
["database", "Database in postgres to connect to (default postgres)"],
["database", "Database in Redshift to connect to"],
],
"Spanner": [
["project_id", "GCP Project to use for Spanner"],
Expand All @@ -125,7 +126,7 @@
],
"Impala": [
["host", "Desired Impala host"],
["port", "Desired Imapala port (10000 if not provided)"],
["port", "Desired Impala port (10000 if not provided)"],
["database", "Desired Impala database (default if not provided)"],
["auth_mechanism", "Desired Impala auth mechanism (PLAIN if not provided)"],
[
Expand Down Expand Up @@ -401,8 +402,9 @@ def _configure_database_specific_parsers(parser):
raw_parser.add_argument("--json", "-j", help="Json string config")

for database in CONNECTION_SOURCE_FIELDS:
article = "an" if database[0].lower() in "aeiou" else "a"
db_parser = subparsers.add_parser(
database, help=f"Store a {database} connection"
database, help=f"Store {article} {database} connection"
)

for field_obj in CONNECTION_SOURCE_FIELDS[database]:
Expand Down
56 changes: 34 additions & 22 deletions docs/connections.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Data Validation Connections
You will need to create connections before running any validations with the data validation tool. The tool allows users to
create these connections using the CLI.
You will need to create connections before running any validations with the data validation tool. The tool allows users to
create these connections using the CLI.

These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or
These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or
a directory specified by the env variable `PSO_DV_CONFIG_HOME`.

## GCS Connection Management (recommended)
Expand Down Expand Up @@ -65,7 +65,7 @@ Below are the connection parameters for each database.

## Raw
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Raw Connection name
Expand All @@ -76,7 +76,7 @@ The raw JSON can also be found in the connection config file. For example,

## Google BigQuery
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME BigQuery Connection name
Expand All @@ -97,7 +97,7 @@ data-validation connections add

## Google Spanner
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Spanner Connection name
Expand All @@ -115,7 +115,7 @@ Please note that Teradata is not-native to this package and must be installed
via `pip install teradatasql` if you have a license.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Teradata Connection name
Expand All @@ -132,7 +132,7 @@ data-validation connections add
Please note the Oracle package is not installed by default. You will need to follow [cx_Oracle](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html) installation steps.
Then `pip install cx_Oracle`.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Oracle Connection name
Expand All @@ -141,20 +141,32 @@ data-validation connections add
--user USER Oracle user
--password PASSWORD Oracle password
--database DATABASE Oracle database
[--url URL] SQLAlchemy connection URL
```


### Oracle User permissions to run DVT:
* CREATE SESSION
* READ or SELECT on any tables to be validated
* Optional - Read on SYS.V_$TRANSACTION (required to get isolation level, if privilege is not given then will default to Read Committed, [more_details](https://docs.sqlalchemy.org/en/14/dialects/oracle.html#transaction-isolation-level-autocommit))

### Using an Oracle wallet:

After creating an Oracle wallet and supporting configuration you can add the connection using the `--url` option, remembering to set `TNS_ADMIN` correctly before doing so. For example:

```
export TNS_ADMIN=/opt/dvt/dvt_tns_admin
data-validation connections add \
--connection-name ora_secure Oracle \
--url="oracle+cx_oracle://@dvt_prod_db"
```

## MSSQL Server
MSSQL Server connections require [pyodbc](https://pypi.org/project/pyodbc/) as the driver: `pip install pyodbc`.
For connection query parameter options, see https://docs.sqlalchemy.org/en/20/dialects/mssql.html#hostname-connections.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME MSSQL Connection name
Expand All @@ -169,7 +181,7 @@ data-validation connections add

## Postgres
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Postgres Connection name
Expand All @@ -183,7 +195,7 @@ data-validation connections add
## AlloyDB
Please note AlloyDB supports same connection config as Postgres.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Postgres Connection name
Expand All @@ -196,7 +208,7 @@ data-validation connections add

## MySQL
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME MySQL Connection name
Expand All @@ -209,7 +221,7 @@ data-validation connections add

## Redshift
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Redshift Connection name
Expand All @@ -222,7 +234,7 @@ data-validation connections add

## FileSystem (CSV or JSON only)
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME FileSystem Connection name
Expand All @@ -233,7 +245,7 @@ data-validation connections add

## Impala
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Impala Connection name
Expand All @@ -256,18 +268,18 @@ data-validation connections add
Please note that for Group By validations, the following property must be set in Hive:

`set hive:hive.groupby.orderby.position.alias=true`

If you are running Hive on Dataproc, you will also need to install the following:
```
pip install ibis-framework[impala]
```
Only Hive >=0.11 is supported due to [impyla](https://github.com/cloudera/impyla)'s dependency on HiveServer2.

Hive connections are based on the Ibis Impala connection which uses [impyla](https://github.com/cloudera/impyla).
Only Hive >=0.11 is supported due to impyla's dependency on HiveServer2.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Impala Connection name
Expand All @@ -290,7 +302,7 @@ data-validation connections add
## DB2
DB2 requires the `ibm_db_sa` package. We currently support only IBM DB2 LUW - Universal Database for Linux/Unix/Windows versions 9.7 onwards.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME DB2 Connection name
Expand All @@ -307,13 +319,13 @@ data-validation connections add
Snowflake requires the `snowflake-sqlalchemy` and `snowflake-connector-python` packages.
For details on connection parameters, see the [Ibis Snowflake connection parameters](https://ibis-project.org/backends/snowflake/#connection-parameters).
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Snowflake Connection name
--user USER Snowflake user
--password PASSWORD Snowflake password
--account ACCOUNT Snowflake account
--account ACCOUNT Snowflake account
--database DATABASE/SCHEMA Snowflake database and schema, separated by a `/`
[--connect-args CONNECT_ARGS] Additional connection args, default {}
```
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
def _setup_session_requirements(session, extra_packages=[]):
"""Install requirements for nox tests."""

session.install("--upgrade", "pip", "pytest", "pytest-cov", "wheel")
session.install("--upgrade", "pip", "pytest==7.4.3", "pytest-cov", "wheel")
session.install("--no-cache-dir", "-e", ".")

if extra_packages:
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import logging
from data_validation import cli_tools


TEST_CONN = '{"source_type":"Example"}'
CLI_ARGS = {
"command": "validate",
Expand All @@ -43,6 +44,36 @@
"example-project",
]

CLI_ADD_CONNECTION_BAD_ARGS = [
"connections",
"add",
"--bad-name",
"test",
"BigQuery",
]

CLI_ADD_ORACLE_STD_CONNECTION_ARGS = [
"connections",
"add",
"--connection-name",
"ora_std_test",
"Oracle",
"--password=p",
"--host=localhost",
"--port=1521",
"--user=u",
"--database=d",
]

CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS = [
"connections",
"add",
"--connection-name",
"ora_wal_test",
"Oracle",
"--url=oracle+cx_oracle://@dvt_user_db",
]

TEST_VALIDATION_CONFIG = {
"source": "example",
"target": "example",
Expand Down Expand Up @@ -129,6 +160,31 @@ def test_create_and_list_connections(caplog, fs):
assert "Connection Name: test : BigQuery" in caplog.records[1].msg


def test_bad_add_connection():
with pytest.raises(SystemExit):
parser = cli_tools.configure_arg_parser()
_ = parser.parse_args(CLI_ADD_CONNECTION_BAD_ARGS)


@mock.patch(
"data_validation.state_manager.StateManager._write_file",
)
def test_create_connections_oracle(mock_write_file):
# Create standard connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_ORACLE_STD_CONNECTION_ARGS)
conn = cli_tools.get_connection_config_from_args(args)
assert "url" not in conn
cli_tools.store_connection(args.connection_name, conn)

# Create wallet based connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS)
conn = cli_tools.get_connection_config_from_args(args)
assert "url" in conn
cli_tools.store_connection(args.connection_name, conn)


def test_configure_arg_parser_list_and_run_validation_configs():
"""Test configuring arg parse in different ways."""
parser = cli_tools.configure_arg_parser()
Expand Down

0 comments on commit 2f078c2

Please sign in to comment.