Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: gcp secret manger support for DVT #704

Merged
merged 19 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,16 +332,28 @@ def _configure_validation_config_parser(subparsers):

def _configure_connection_parser(subparsers):
"""Configure the Parser for Connection Management."""

connection_parser = subparsers.add_parser(
"connections", help="Manage & Store connections to your Databases"
)
connect_subparsers = connection_parser.add_subparsers(dest="connect_cmd")
_ = connect_subparsers.add_parser("list", help="List your connections")

add_parser = connect_subparsers.add_parser("add", help="Store a new connection")
add_parser.add_argument(
"--connection-name", "-c", help="Name of connection used as reference"
)
add_parser.add_argument(
"--secret-manager-type",
"-sm",
default=None,
help="Secret manager type to store credentials by default will be None ",
)
add_parser.add_argument(
"--secret-manager-project-id",
"-sm-prj-id",
default=None,
help="Project ID for the secret manager that stores the credentials",
)
_configure_database_specific_parsers(add_parser)


Expand Down Expand Up @@ -873,7 +885,13 @@ def _add_common_partition_arguments(optional_arguments, required_arguments):

def get_connection_config_from_args(args):
"""Return dict with connection config supplied."""
config = {consts.SOURCE_TYPE: args.connect_type}
config = {
consts.SOURCE_TYPE: args.connect_type,
consts.SECRET_MANAGER_TYPE: getattr(args, consts.SECRET_MANAGER_TYPE),
consts.SECRET_MANAGER_PROJECT_ID: getattr(
args, consts.SECRET_MANAGER_PROJECT_ID
),
}

if args.connect_type == "Raw":
return json.loads(args.json)
Expand Down
26 changes: 21 additions & 5 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from ibis.backends.postgres.client import PostgreSQLClient
from third_party.ibis.ibis_cloud_spanner.api import connect as spanner_connect
from third_party.ibis.ibis_impala.api import impala_connect

from data_validation import client_info, consts, exceptions
from data_validation.secret_manager import SecretManagerBuilder

ibis.options.sql.default_limit = None

Expand Down Expand Up @@ -224,12 +224,28 @@ def get_data_client(connection_config):
"""Return DataClient client from given configuration"""
connection_config = copy.deepcopy(connection_config)
source_type = connection_config.pop(consts.SOURCE_TYPE)
secret_manager_type = connection_config.pop(consts.SECRET_MANAGER_TYPE, None)
secret_manager_project_id = connection_config.pop(
consts.SECRET_MANAGER_PROJECT_ID, None
)

decrypted_connection_config = {}
if secret_manager_type is not None:
sm = SecretManagerBuilder().build(secret_manager_type.lower())
for config_item in connection_config:
decrypted_connection_config[config_item] = sm.maybe_secret(
secret_manager_project_id, connection_config[config_item]
)
else:
decrypted_connection_config = connection_config

# The ibis_bigquery.connect expects a credentials object, not a string.
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in connection_config:
key_path = connection_config.pop(consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH)
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in decrypted_connection_config:
key_path = decrypted_connection_config.pop(
consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH
)
if key_path:
connection_config[
decrypted_connection_config[
"credentials"
] = google.oauth2.service_account.Credentials.from_service_account_file(
key_path
Expand All @@ -242,7 +258,7 @@ def get_data_client(connection_config):
raise Exception(msg)

try:
data_client = CLIENT_LOOKUP[source_type](**connection_config)
data_client = CLIENT_LOOKUP[source_type](**decrypted_connection_config)
data_client._source_type = source_type
except Exception as e:
msg = 'Connection Type "{source_type}" could not connect: {error}'.format(
Expand Down
1 change: 0 additions & 1 deletion data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,6 @@ def build_column_configs(self, columns):
casefold_target_columns = {x.casefold(): str(x) for x in target_table.columns}

for column in columns:

if column.casefold() not in casefold_source_columns:
raise ValueError(f"Grouped Column DNE in source: {column}")
if column.casefold() not in casefold_target_columns:
Expand Down
2 changes: 2 additions & 0 deletions data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

# Configuration Fields
SOURCE_TYPE = "source_type"
SECRET_MANAGER_TYPE = "secret_manager_type"
SECRET_MANAGER_PROJECT_ID = "secret_manager_project_id"
CONFIG = "config"
CONFIG_FILE = "config_file"
CONFIG_SOURCE_CONN_NAME = "source_conn_name"
Expand Down
2 changes: 0 additions & 2 deletions data_validation/partition_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

class PartitionBuilder:
def __init__(self, config_managers: List[ConfigManager], args: Namespace) -> None:

self.config_managers = config_managers
self.table_count = len(config_managers)
self.args = args
Expand Down Expand Up @@ -97,7 +96,6 @@ def _get_partition_key_filters(self) -> List[List[str]]:
master_filter_list = []

for config_manager in self.config_managers:

validation_builder = ValidationBuilder(config_manager)

source_partition_row_builder = PartitionRowBuilder(
Expand Down
1 change: 0 additions & 1 deletion data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ def compile(self, ibis_table):

class CalculatedField(object):
def __init__(self, ibis_expr, config, fields, cast=None, **kwargs):

"""A representation of an calculated field to build a query.

Args:
Expand Down
1 change: 0 additions & 1 deletion data_validation/result_handlers/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def get_handler_for_project(
return BigQueryResultHandler(client, status_list=status_list, table_id=table_id)

def execute(self, result_df):

if self._status_list is not None:
result_df = filter_validation_status(self._status_list, result_df)

Expand Down
52 changes: 52 additions & 0 deletions data_validation/secret_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging


class SecretManagerBuilder:
def build(self, client_type):
"""
:param client_type:
:return: secret manager instance currently support gcp secret manager
"""
if client_type.lower() == "gcp":
return GCPSecretManager()
else:
raise Exception(f"{client_type} is not supported yet.")


class GCPSecretManager:
"""
GCPSecretManager: client to access secrets stored at GCP secret manager
"""

def __init__(self):
# Import the Secret Manager client library.
from google.cloud import secretmanager

# Create the Secret Manager client.
self.client = secretmanager.SecretManagerServiceClient()

def maybe_secret(self, project_id, secret_id, version_id="latest"):
"""
Get information about the given secret.
:return String value with the secret value or the secret id if the secret value if not exists
"""
try:
# Build the resource name of the secret.
name = f"projects/{project_id}/secrets/{secret_id}/versions/{version_id}"
# Access the secret version.
response = self.client.access_secret_version(name=name)
# Return the decoded payload.
payload = response.payload.data.decode("UTF-8")
return payload
except Exception as e:
logging.warning(f"{secret_id} : {e}")
return secret_id
85 changes: 82 additions & 3 deletions docs/connections.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@ eg.
The following commands can be used to create connections:

## Command template to create a connection:
Secret manager flags are optional

--secret-manager-type <None|GCP>
--secret-manager-project-id <SECRET_PROJECT_ID>

```
data-validation connections add --connection-name CONN_NAME source-type
data-validation connections add --connection-name CONN_NAME source-type --secret-manager-type <None|GCP> --secret-manager-project-id <SECRET_PROJECT_ID>
```

## Create a sample BigQuery connection:
Expand Down Expand Up @@ -68,13 +73,19 @@ Below is the expected configuration for each type.
```
{
# Raw JSON config for a connection
"json": '{"source-type": "BigQuery", "project-id": "pso-kokoro-resources", "google-service-account-key-path": null}'

"json": '{ "secret_manager_type": null, "secret_manager_project_id": null, "source-type": "BigQuery", "project-id": "pso-kokoro-resources", "google-service-account-key-path": null}'
}
```

## Google BigQuery
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "BigQuery",

Expand All @@ -99,6 +110,12 @@ Below is the expected configuration for each type.
## Google Spanner
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager type
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Spanner",

Expand All @@ -124,6 +141,13 @@ Please note that Teradata is not-native to this package and must be installed
via `pip install teradatasql` if you have a license.
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",


# Configuration Required for All Data Sources
"source-type": "Teradata",

Expand All @@ -142,6 +166,12 @@ Please note the Oracle package is not installed by default. You will need to fol
Then `pip install cx_Oracle`.
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Oracle",

Expand All @@ -160,6 +190,12 @@ Please note the MSSQL Server package is not installed by default. You will need
Then `pip install pyodbc`.
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "MSSQL",

Expand All @@ -176,6 +212,12 @@ Then `pip install pyodbc`.
## Postgres
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Postgres",

Expand All @@ -192,6 +234,12 @@ Then `pip install pyodbc`.
Please note AlloyDB supports same connection config as Postgres.
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Postgres",

Expand All @@ -207,6 +255,12 @@ Please note AlloyDB supports same connection config as Postgres.
## MySQL
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "MySQL",

Expand All @@ -222,6 +276,12 @@ Please note AlloyDB supports same connection config as Postgres.
## Redshift
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Redshift",

Expand Down Expand Up @@ -254,6 +314,12 @@ Please note AlloyDB supports same connection config as Postgres.
## Impala
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "Impala",

Expand All @@ -277,6 +343,13 @@ Please note that for Group By validations, the following property must be set in

```
{

# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Hive is based off Impala connector
"source-type": "Impala",

Expand All @@ -292,6 +365,12 @@ Only Hive >=0.11 is supported due to [impyla](https://github.com/cloudera/impyla
## DB2
```
{
# secret manager type
"secret_manager_type": "GCP",

# secret manager project id
"secret_manager_project_id": "secrets-project-id",

# Configuration Required for All Data Sources
"source-type": "DB2",

Expand Down
12 changes: 12 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,15 @@ def integration_hive(session):
raise Exception("Expected Env Var: %s" % env_var)

session.run("pytest", "tests/system/data_sources/test_hive.py", *session.posargs)


@nox.session(python=random.choice(PYTHON_VERSIONS), venv_backend="venv")
def integration_secrets(session):
"""
Run SecretManager integration tests.
Ensure the SecretManager is running as expected.
"""
_setup_session_requirements(session, extra_packages=[])

test_path = "tests/system/test_secret_manager.py"
session.run("pytest", test_path, *session.posargs)
Loading