Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: gcp secret manger support for DVT #704

Merged
merged 19 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,28 @@ def _configure_validation_config_parser(subparsers):

def _configure_connection_parser(subparsers):
"""Configure the Parser for Connection Management."""

connection_parser = subparsers.add_parser(
"connections", help="Manage & Store connections to your Databases"
)
connect_subparsers = connection_parser.add_subparsers(dest="connect_cmd")
_ = connect_subparsers.add_parser("list", help="List your connections")

add_parser = connect_subparsers.add_parser("add", help="Store a new connection")
add_parser.add_argument(
"--connection-name", "-c", help="Name of connection used as reference"
)
add_parser.add_argument(
"--secret-manger-type",
mokhahmed marked this conversation as resolved.
Show resolved Hide resolved
"-sm",
default=None,
help="Secret manger type to store credentials by default will be plain ",
)
add_parser.add_argument(
"--secret-manger-project-id",
"-sm-prj-id",
default=None,
help="Secret manger type to store credentials by default will be plain ",
mokhahmed marked this conversation as resolved.
Show resolved Hide resolved
)
_configure_database_specific_parsers(add_parser)


Expand Down Expand Up @@ -820,7 +832,11 @@ def _add_common_partition_arguments(optional_arguments, required_arguments=None)

def get_connection_config_from_args(args):
"""Return dict with connection config supplied."""
config = {consts.SOURCE_TYPE: args.connect_type}
config = {
consts.SOURCE_TYPE: args.connect_type,
consts.SECRET_MANGER_TYPE: getattr(args, consts.SECRET_MANGER_TYPE),
consts.SECRET_MANGER_PROJECT_ID: getattr(args, consts.SECRET_MANGER_PROJECT_ID),
}

if args.connect_type == "Raw":
return json.loads(args.json)
Expand Down
26 changes: 21 additions & 5 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from ibis.backends.postgres.client import PostgreSQLClient
from third_party.ibis.ibis_cloud_spanner.api import connect as spanner_connect
from third_party.ibis.ibis_impala.api import impala_connect

from data_validation import client_info, consts, exceptions
from data_validation.secret_manger import SecretMangerBuilder

ibis.options.sql.default_limit = None

Expand Down Expand Up @@ -224,12 +224,28 @@ def get_data_client(connection_config):
"""Return DataClient client from given configuration"""
connection_config = copy.deepcopy(connection_config)
source_type = connection_config.pop(consts.SOURCE_TYPE)
secret_manger_type = connection_config.pop(consts.SECRET_MANGER_TYPE, None)
secret_manger_project_id = connection_config.pop(
consts.SECRET_MANGER_PROJECT_ID, None
)

decrypted_connection_config = {}
if secret_manger_type is not None:
sm = SecretMangerBuilder().build(secret_manger_type.lower())
for config_item in connection_config:
decrypted_connection_config[config_item] = sm.maybe_secret(
secret_manger_project_id, connection_config[config_item]
)
else:
decrypted_connection_config = connection_config

# The ibis_bigquery.connect expects a credentials object, not a string.
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in connection_config:
key_path = connection_config.pop(consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH)
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in decrypted_connection_config:
key_path = decrypted_connection_config.pop(
consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH
)
if key_path:
connection_config[
decrypted_connection_config[
"credentials"
] = google.oauth2.service_account.Credentials.from_service_account_file(
key_path
Expand All @@ -242,7 +258,7 @@ def get_data_client(connection_config):
raise Exception(msg)

try:
data_client = CLIENT_LOOKUP[source_type](**connection_config)
data_client = CLIENT_LOOKUP[source_type](**decrypted_connection_config)
data_client._source_type = source_type
except Exception as e:
msg = 'Connection Type "{source_type}" could not connect: {error}'.format(
Expand Down
1 change: 0 additions & 1 deletion data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,6 @@ def build_column_configs(self, columns):
casefold_target_columns = {x.casefold(): str(x) for x in target_table.columns}

for column in columns:

if column.casefold() not in casefold_source_columns:
raise ValueError(f"Grouped Column DNE in source: {column}")
if column.casefold() not in casefold_target_columns:
Expand Down
2 changes: 2 additions & 0 deletions data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

# Configuration Fields
SOURCE_TYPE = "source_type"
SECRET_MANGER_TYPE = "secret_manger_type"
SECRET_MANGER_PROJECT_ID = "secret_manger_project_id"
CONFIG = "config"
CONFIG_FILE = "config_file"
CONFIG_SOURCE_CONN_NAME = "source_conn_name"
Expand Down
2 changes: 0 additions & 2 deletions data_validation/partition_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

class PartitionBuilder:
def __init__(self, config_managers: List[ConfigManager], args: Namespace) -> None:

self.config_managers = config_managers
self.table_count = len(config_managers)
self.args = args
Expand Down Expand Up @@ -97,7 +96,6 @@ def _get_partition_key_filters(self) -> List[List[str]]:
master_filter_list = []

for config_manager in self.config_managers:

validation_builder = ValidationBuilder(config_manager)

source_partition_row_builder = PartitionRowBuilder(
Expand Down
1 change: 0 additions & 1 deletion data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ def compile(self, ibis_table):

class CalculatedField(object):
def __init__(self, ibis_expr, config, fields, cast=None, **kwargs):

"""A representation of an calculated field to build a query.

Args:
Expand Down
1 change: 0 additions & 1 deletion data_validation/result_handlers/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def get_handler_for_project(
return BigQueryResultHandler(client, status_list=status_list, table_id=table_id)

def execute(self, result_df):

if self._status_list is not None:
result_df = filter_validation_status(self._status_list, result_df)

Expand Down
1 change: 0 additions & 1 deletion data_validation/schema_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def schema_validation_matching(
]
)
elif string_val(source_field_type) in allow_list_map:

allowed_target_field_type = allow_list_map[
string_val(source_field_type)
]
Expand Down
53 changes: 53 additions & 0 deletions data_validation/secret_manger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
class SecretMangerBuilder:
mokhahmed marked this conversation as resolved.
Show resolved Hide resolved
def build(self, client_type):
"""
:param client_type:
:return: secret manger instance currently support gcp secret manger
"""
if client_type.lower() == "gcp":
return GCPSecretManger()
else:
raise Exception(f"{client_type} is not supported yet.")


class GCPSecretManger:
"""
GCPSecretManger: client to access secrets stored at GCP secret manger
"""

def __init__(self):
# Import the Secret Manager client library.
from google.cloud import secretmanager
mokhahmed marked this conversation as resolved.
Show resolved Hide resolved

# Create the Secret Manager client.
self.client = secretmanager.SecretManagerServiceClient()

def maybe_secret(self, project_id, secret_id, version_id="latest"):
"""
Get information about the given secret.
:return String value with the secret value or the secret id if the secret value if not exists
"""
try:
# Build the resource name of the secret.
name = f"projects/{project_id}/secrets/{secret_id}/versions/{version_id}"
# Access the secret version.
response = self.client.access_secret_version(name=name)
# Return the decoded payload.
payload = response.payload.data.decode("UTF-8")
return payload
except Exception as e:
print(e)
return secret_id


if __name__ == "__main__":
import os

os.environ[
"GOOGLE_APPLICATION_CREDENTIALS"
] = "/Users/moukhtar/ws/ma-sabre-sandbox-01-dfe8f33bb3ad.json"
mokhahmed marked this conversation as resolved.
Show resolved Hide resolved
print(
SecretMangerBuilder()
.build("GCP")
.maybe_secret("ma-sabre-sandbox-01", "db_user")
)
Loading