Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add --url to Oracle connections add options #1083

Merged
merged 5 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
["user", "User used to connect"],
["password", "Password for supplied user"],
["database", "Database to connect to"],
["url", "Oracle SQLAlchemy connection URL"],
],
"MSSQL": [
["host", "Desired SQL Server host (default localhost)"],
Expand All @@ -99,18 +100,18 @@
["connect_args", "(Optional) Additional connection arg mapping"],
],
"Postgres": [
["host", "Desired Postgres host."],
["port", "Postgres port to connect on (ie. 5432)"],
["host", "Desired PostgreSQL host."],
["port", "PostgreSQL port to connect on (e.g. 5432)"],
["user", "Username to connect to"],
["password", "Password for authentication of user"],
["database", "Database in postgres to connect to (default postgres)"],
["database", "Database in PostgreSQL to connect to (default postgres)"],
],
"Redshift": [
["host", "Desired Postgres host."],
["port", "Postgres port to connect on (ie. 5439)"],
["host", "Desired Redshift host."],
["port", "Redshift port to connect on (e.g. 5439)"],
["user", "Username to connect to"],
["password", "Password for authentication of user"],
["database", "Database in postgres to connect to (default postgres)"],
["database", "Database in Redshift to connect to"],
],
"Spanner": [
["project_id", "GCP Project to use for Spanner"],
Expand All @@ -125,7 +126,7 @@
],
"Impala": [
["host", "Desired Impala host"],
["port", "Desired Imapala port (10000 if not provided)"],
["port", "Desired Impala port (10000 if not provided)"],
["database", "Desired Impala database (default if not provided)"],
["auth_mechanism", "Desired Impala auth mechanism (PLAIN if not provided)"],
[
Expand Down Expand Up @@ -401,8 +402,9 @@ def _configure_database_specific_parsers(parser):
raw_parser.add_argument("--json", "-j", help="Json string config")

for database in CONNECTION_SOURCE_FIELDS:
article = "an" if database[0].lower() in "aeiou" else "a"
db_parser = subparsers.add_parser(
database, help=f"Store a {database} connection"
database, help=f"Store {article} {database} connection"
)

for field_obj in CONNECTION_SOURCE_FIELDS[database]:
Expand Down
56 changes: 34 additions & 22 deletions docs/connections.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Data Validation Connections
You will need to create connections before running any validations with the data validation tool. The tool allows users to
create these connections using the CLI.
You will need to create connections before running any validations with the data validation tool. The tool allows users to
create these connections using the CLI.

These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or
These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or
a directory specified by the env variable `PSO_DV_CONFIG_HOME`.

## GCS Connection Management (recommended)
Expand Down Expand Up @@ -65,7 +65,7 @@ Below are the connection parameters for each database.

## Raw
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Raw Connection name
Expand All @@ -76,7 +76,7 @@ The raw JSON can also be found in the connection config file. For example,

## Google BigQuery
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME BigQuery Connection name
Expand All @@ -97,7 +97,7 @@ data-validation connections add

## Google Spanner
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Spanner Connection name
Expand All @@ -115,7 +115,7 @@ Please note that Teradata is not-native to this package and must be installed
via `pip install teradatasql` if you have a license.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Teradata Connection name
Expand All @@ -132,7 +132,7 @@ data-validation connections add
Please note the Oracle package is not installed by default. You will need to follow [cx_Oracle](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html) installation steps.
Then `pip install cx_Oracle`.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Oracle Connection name
Expand All @@ -141,20 +141,32 @@ data-validation connections add
--user USER Oracle user
--password PASSWORD Oracle password
--database DATABASE Oracle database
[--url URL] SQLAlchemy connection URL
```


### Oracle User permissions to run DVT:
* CREATE SESSION
* READ or SELECT on any tables to be validated
* Optional - Read on SYS.V_$TRANSACTION (required to get isolation level, if privilege is not given then will default to Read Committed, [more_details](https://docs.sqlalchemy.org/en/14/dialects/oracle.html#transaction-isolation-level-autocommit))

### Using an Oracle wallet:

After creating an Oracle wallet and supporting configuration you can add the connection using the `--url` option, remembering to set `TNS_ADMIN` correctly before doing so. For example:

```
export TNS_ADMIN=/opt/dvt/dvt_tns_admin

data-validation connections add \
--connection-name ora_secure Oracle \
--url="oracle+cx_oracle://@dvt_prod_db"
```

## MSSQL Server
MSSQL Server connections require [pyodbc](https://pypi.org/project/pyodbc/) as the driver: `pip install pyodbc`.
For connection query parameter options, see https://docs.sqlalchemy.org/en/20/dialects/mssql.html#hostname-connections.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME MSSQL Connection name
Expand All @@ -169,7 +181,7 @@ data-validation connections add

## Postgres
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Postgres Connection name
Expand All @@ -183,7 +195,7 @@ data-validation connections add
## AlloyDB
Please note AlloyDB supports same connection config as Postgres.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Postgres Connection name
Expand All @@ -196,7 +208,7 @@ data-validation connections add

## MySQL
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME MySQL Connection name
Expand All @@ -209,7 +221,7 @@ data-validation connections add

## Redshift
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Redshift Connection name
Expand All @@ -222,7 +234,7 @@ data-validation connections add

## FileSystem (CSV or JSON only)
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME FileSystem Connection name
Expand All @@ -233,7 +245,7 @@ data-validation connections add

## Impala
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Impala Connection name
Expand All @@ -256,18 +268,18 @@ data-validation connections add
Please note that for Group By validations, the following property must be set in Hive:

`set hive:hive.groupby.orderby.position.alias=true`

If you are running Hive on Dataproc, you will also need to install the following:
```
pip install ibis-framework[impala]
```
Only Hive >=0.11 is supported due to [impyla](https://github.com/cloudera/impyla)'s dependency on HiveServer2.

Hive connections are based on the Ibis Impala connection which uses [impyla](https://github.com/cloudera/impyla).
Only Hive >=0.11 is supported due to impyla's dependency on HiveServer2.

```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Impala Connection name
Expand All @@ -290,7 +302,7 @@ data-validation connections add
## DB2
DB2 requires the `ibm_db_sa` package. We currently support only IBM DB2 LUW - Universal Database for Linux/Unix/Windows versions 9.7 onwards.
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME DB2 Connection name
Expand All @@ -307,13 +319,13 @@ data-validation connections add
Snowflake requires the `snowflake-sqlalchemy` and `snowflake-connector-python` packages.
For details on connection parameters, see the [Ibis Snowflake connection parameters](https://ibis-project.org/backends/snowflake/#connection-parameters).
```
data-validation connections add
data-validation connections add
[--secret-manager-type <None|GCP>] Secret Manager type (None, GCP)
[--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID
--connection-name CONN_NAME Snowflake Connection name
--user USER Snowflake user
--password PASSWORD Snowflake password
--account ACCOUNT Snowflake account
--account ACCOUNT Snowflake account
--database DATABASE/SCHEMA Snowflake database and schema, separated by a `/`
[--connect-args CONNECT_ARGS] Additional connection args, default {}
```
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
def _setup_session_requirements(session, extra_packages=[]):
"""Install requirements for nox tests."""

session.install("--upgrade", "pip", "pytest", "pytest-cov", "wheel")
session.install("--upgrade", "pip", "pytest==7.4.3", "pytest-cov", "wheel")
session.install("--no-cache-dir", "-e", ".")

if extra_packages:
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import logging
from data_validation import cli_tools


TEST_CONN = '{"source_type":"Example"}'
CLI_ARGS = {
"command": "validate",
Expand All @@ -43,6 +44,36 @@
"example-project",
]

CLI_ADD_CONNECTION_BAD_ARGS = [
"connections",
"add",
"--bad-name",
"test",
"BigQuery",
]

CLI_ADD_ORACLE_STD_CONNECTION_ARGS = [
"connections",
"add",
"--connection-name",
"ora_std_test",
"Oracle",
"--password=p",
"--host=localhost",
"--port=1521",
"--user=u",
"--database=d",
]

CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS = [
"connections",
"add",
"--connection-name",
"ora_wal_test",
"Oracle",
"--url=oracle+cx_oracle://@dvt_user_db",
]

TEST_VALIDATION_CONFIG = {
"source": "example",
"target": "example",
Expand Down Expand Up @@ -129,6 +160,31 @@ def test_create_and_list_connections(caplog, fs):
assert "Connection Name: test : BigQuery" in caplog.records[1].msg


def test_bad_add_connection():
with pytest.raises(SystemExit):
parser = cli_tools.configure_arg_parser()
_ = parser.parse_args(CLI_ADD_CONNECTION_BAD_ARGS)


@mock.patch(
"data_validation.state_manager.StateManager._write_file",
)
def test_create_connections_oracle(mock_write_file):
# Create standard connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_ORACLE_STD_CONNECTION_ARGS)
conn = cli_tools.get_connection_config_from_args(args)
assert "url" not in conn
cli_tools.store_connection(args.connection_name, conn)

# Create wallet based connection
parser = cli_tools.configure_arg_parser()
args = parser.parse_args(CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS)
conn = cli_tools.get_connection_config_from_args(args)
assert "url" in conn
cli_tools.store_connection(args.connection_name, conn)


def test_configure_arg_parser_list_and_run_validation_configs():
"""Test configuring arg parse in different ways."""
parser = cli_tools.configure_arg_parser()
Expand Down