diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index a9ea88ed..3fb695ca 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -74,6 +74,7 @@ ["user", "User used to connect"], ["password", "Password for supplied user"], ["database", "Database to connect to"], + ["url", "Oracle SQLAlchemy connection URL"], ], "MSSQL": [ ["host", "Desired SQL Server host (default localhost)"], @@ -99,18 +100,18 @@ ["connect_args", "(Optional) Additional connection arg mapping"], ], "Postgres": [ - ["host", "Desired Postgres host."], - ["port", "Postgres port to connect on (ie. 5432)"], + ["host", "Desired PostgreSQL host."], + ["port", "PostgreSQL port to connect on (e.g. 5432)"], ["user", "Username to connect to"], ["password", "Password for authentication of user"], - ["database", "Database in postgres to connect to (default postgres)"], + ["database", "Database in PostgreSQL to connect to (default postgres)"], ], "Redshift": [ - ["host", "Desired Postgres host."], - ["port", "Postgres port to connect on (ie. 5439)"], + ["host", "Desired Redshift host."], + ["port", "Redshift port to connect on (e.g. 5439)"], ["user", "Username to connect to"], ["password", "Password for authentication of user"], - ["database", "Database in postgres to connect to (default postgres)"], + ["database", "Database in Redshift to connect to"], ], "Spanner": [ ["project_id", "GCP Project to use for Spanner"], @@ -125,7 +126,7 @@ ], "Impala": [ ["host", "Desired Impala host"], - ["port", "Desired Imapala port (10000 if not provided)"], + ["port", "Desired Impala port (10000 if not provided)"], ["database", "Desired Impala database (default if not provided)"], ["auth_mechanism", "Desired Impala auth mechanism (PLAIN if not provided)"], [ @@ -401,8 +402,9 @@ def _configure_database_specific_parsers(parser): raw_parser.add_argument("--json", "-j", help="Json string config") for database in CONNECTION_SOURCE_FIELDS: + article = "an" if database[0].lower() in "aeiou" else "a" db_parser = subparsers.add_parser( - database, help=f"Store a {database} connection" + database, help=f"Store {article} {database} connection" ) for field_obj in CONNECTION_SOURCE_FIELDS[database]: diff --git a/docs/connections.md b/docs/connections.md index 61c94561..48dc0d90 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -1,8 +1,8 @@ # Data Validation Connections -You will need to create connections before running any validations with the data validation tool. The tool allows users to -create these connections using the CLI. +You will need to create connections before running any validations with the data validation tool. The tool allows users to +create these connections using the CLI. -These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or +These connections will automatically be saved either to `~/.config/google-pso-data-validator/` or a directory specified by the env variable `PSO_DV_CONFIG_HOME`. ## GCS Connection Management (recommended) @@ -65,7 +65,7 @@ Below are the connection parameters for each database. ## Raw ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Raw Connection name @@ -76,7 +76,7 @@ The raw JSON can also be found in the connection config file. For example, ## Google BigQuery ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME BigQuery Connection name @@ -97,7 +97,7 @@ data-validation connections add ## Google Spanner ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Spanner Connection name @@ -115,7 +115,7 @@ Please note that Teradata is not-native to this package and must be installed via `pip install teradatasql` if you have a license. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Teradata Connection name @@ -132,7 +132,7 @@ data-validation connections add Please note the Oracle package is not installed by default. You will need to follow [cx_Oracle](https://cx-oracle.readthedocs.io/en/latest/user_guide/installation.html) installation steps. Then `pip install cx_Oracle`. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Oracle Connection name @@ -141,20 +141,32 @@ data-validation connections add --user USER Oracle user --password PASSWORD Oracle password --database DATABASE Oracle database + [--url URL] SQLAlchemy connection URL ``` - ### Oracle User permissions to run DVT: * CREATE SESSION * READ or SELECT on any tables to be validated * Optional - Read on SYS.V_$TRANSACTION (required to get isolation level, if privilege is not given then will default to Read Committed, [more_details](https://docs.sqlalchemy.org/en/14/dialects/oracle.html#transaction-isolation-level-autocommit)) +### Using an Oracle wallet: + +After creating an Oracle wallet and supporting configuration you can add the connection using the `--url` option, remembering to set `TNS_ADMIN` correctly before doing so. For example: + +``` +export TNS_ADMIN=/opt/dvt/dvt_tns_admin + +data-validation connections add \ + --connection-name ora_secure Oracle \ + --url="oracle+cx_oracle://@dvt_prod_db" +``` + ## MSSQL Server MSSQL Server connections require [pyodbc](https://pypi.org/project/pyodbc/) as the driver: `pip install pyodbc`. For connection query parameter options, see https://docs.sqlalchemy.org/en/20/dialects/mssql.html#hostname-connections. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME MSSQL Connection name @@ -169,7 +181,7 @@ data-validation connections add ## Postgres ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Postgres Connection name @@ -183,7 +195,7 @@ data-validation connections add ## AlloyDB Please note AlloyDB supports same connection config as Postgres. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Postgres Connection name @@ -196,7 +208,7 @@ data-validation connections add ## MySQL ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME MySQL Connection name @@ -209,7 +221,7 @@ data-validation connections add ## Redshift ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Redshift Connection name @@ -222,7 +234,7 @@ data-validation connections add ## FileSystem (CSV or JSON only) ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME FileSystem Connection name @@ -233,7 +245,7 @@ data-validation connections add ## Impala ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Impala Connection name @@ -256,18 +268,18 @@ data-validation connections add Please note that for Group By validations, the following property must be set in Hive: `set hive:hive.groupby.orderby.position.alias=true` - + If you are running Hive on Dataproc, you will also need to install the following: ``` pip install ibis-framework[impala] ``` Only Hive >=0.11 is supported due to [impyla](https://github.com/cloudera/impyla)'s dependency on HiveServer2. - + Hive connections are based on the Ibis Impala connection which uses [impyla](https://github.com/cloudera/impyla). Only Hive >=0.11 is supported due to impyla's dependency on HiveServer2. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Impala Connection name @@ -290,7 +302,7 @@ data-validation connections add ## DB2 DB2 requires the `ibm_db_sa` package. We currently support only IBM DB2 LUW - Universal Database for Linux/Unix/Windows versions 9.7 onwards. ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME DB2 Connection name @@ -307,13 +319,13 @@ data-validation connections add Snowflake requires the `snowflake-sqlalchemy` and `snowflake-connector-python` packages. For details on connection parameters, see the [Ibis Snowflake connection parameters](https://ibis-project.org/backends/snowflake/#connection-parameters). ``` -data-validation connections add +data-validation connections add [--secret-manager-type ] Secret Manager type (None, GCP) [--secret-manager-project-id SECRET_PROJECT_ID] Secret Manager project ID --connection-name CONN_NAME Snowflake Connection name --user USER Snowflake user --password PASSWORD Snowflake password - --account ACCOUNT Snowflake account + --account ACCOUNT Snowflake account --database DATABASE/SCHEMA Snowflake database and schema, separated by a `/` [--connect-args CONNECT_ARGS] Additional connection args, default {} ``` diff --git a/noxfile.py b/noxfile.py index fc2355c2..646ce8b2 100644 --- a/noxfile.py +++ b/noxfile.py @@ -47,7 +47,7 @@ def _setup_session_requirements(session, extra_packages=[]): """Install requirements for nox tests.""" - session.install("--upgrade", "pip", "pytest", "pytest-cov", "wheel") + session.install("--upgrade", "pip", "pytest==7.4.3", "pytest-cov", "wheel") session.install("--no-cache-dir", "-e", ".") if extra_packages: diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py index 8dba04bb..8428b08e 100644 --- a/tests/unit/test_cli_tools.py +++ b/tests/unit/test_cli_tools.py @@ -18,6 +18,7 @@ import logging from data_validation import cli_tools + TEST_CONN = '{"source_type":"Example"}' CLI_ARGS = { "command": "validate", @@ -43,6 +44,36 @@ "example-project", ] +CLI_ADD_CONNECTION_BAD_ARGS = [ + "connections", + "add", + "--bad-name", + "test", + "BigQuery", +] + +CLI_ADD_ORACLE_STD_CONNECTION_ARGS = [ + "connections", + "add", + "--connection-name", + "ora_std_test", + "Oracle", + "--password=p", + "--host=localhost", + "--port=1521", + "--user=u", + "--database=d", +] + +CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS = [ + "connections", + "add", + "--connection-name", + "ora_wal_test", + "Oracle", + "--url=oracle+cx_oracle://@dvt_user_db", +] + TEST_VALIDATION_CONFIG = { "source": "example", "target": "example", @@ -129,6 +160,31 @@ def test_create_and_list_connections(caplog, fs): assert "Connection Name: test : BigQuery" in caplog.records[1].msg +def test_bad_add_connection(): + with pytest.raises(SystemExit): + parser = cli_tools.configure_arg_parser() + _ = parser.parse_args(CLI_ADD_CONNECTION_BAD_ARGS) + + +@mock.patch( + "data_validation.state_manager.StateManager._write_file", +) +def test_create_connections_oracle(mock_write_file): + # Create standard connection + parser = cli_tools.configure_arg_parser() + args = parser.parse_args(CLI_ADD_ORACLE_STD_CONNECTION_ARGS) + conn = cli_tools.get_connection_config_from_args(args) + assert "url" not in conn + cli_tools.store_connection(args.connection_name, conn) + + # Create wallet based connection + parser = cli_tools.configure_arg_parser() + args = parser.parse_args(CLI_ADD_ORACLE_WALLET_CONNECTION_ARGS) + conn = cli_tools.get_connection_config_from_args(args) + assert "url" in conn + cli_tools.store_connection(args.connection_name, conn) + + def test_configure_arg_parser_list_and_run_validation_configs(): """Test configuring arg parse in different ways.""" parser = cli_tools.configure_arg_parser()