Skip to content

Commit

Permalink
feat: gcp secret manger support for DVT (#704)
Browse files Browse the repository at this point in the history
* adding support for secret manger [GCP]

* adding support for secret manger [GCP]

* adding support for secret manger [GCP]

* adding support for secret manger [GCP]

* Revert "adding support for secret manger [GCP]"

This reverts commit b959cda.

* adding support for secret manger [GCP]

* Merge remote-tracking branch 'origin/develop' into secret-manager

# Conflicts:
#	docs/connections.md

* Merge remote-tracking branch 'origin/develop' into secret-manager

# Conflicts:
#	docs/connections.md

* code review fixes
- fix manger typo to manager
- rename argument description
- add google-cloud-secret-manager to requirement file

* code review fixes
- revert the naming of variables connections.md

* code review fixes
- revert the naming of variables connections.md

* code review fixes
- revert the naming of variables connections.md

* code review fixes
- revert the naming of variables connections.md

* code review fixes
- add test cases for secret manager class

* code review fixes
- revert the naming of variables connections.md

* code review fixes
- move secret manager testcases

* code review fixes
- revert the naming of variables connections.md
  • Loading branch information
mokhahmed committed Feb 27, 2023
1 parent 9fd102a commit d6c40f1
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 17 deletions.
22 changes: 20 additions & 2 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,16 +332,28 @@ def _configure_validation_config_parser(subparsers):

def _configure_connection_parser(subparsers):
"""Configure the Parser for Connection Management."""

connection_parser = subparsers.add_parser(
"connections", help="Manage & Store connections to your Databases"
)
connect_subparsers = connection_parser.add_subparsers(dest="connect_cmd")
_ = connect_subparsers.add_parser("list", help="List your connections")

add_parser = connect_subparsers.add_parser("add", help="Store a new connection")
add_parser.add_argument(
"--connection-name", "-c", help="Name of connection used as reference"
)
add_parser.add_argument(
"--secret-manager-type",
"-sm",
default=None,
help="Secret manager type to store credentials by default will be None ",
)
add_parser.add_argument(
"--secret-manager-project-id",
"-sm-prj-id",
default=None,
help="Project ID for the secret manager that stores the credentials",
)
_configure_database_specific_parsers(add_parser)


Expand Down Expand Up @@ -909,7 +921,13 @@ def _add_common_partition_arguments(optional_arguments, required_arguments):

def get_connection_config_from_args(args):
"""Return dict with connection config supplied."""
config = {consts.SOURCE_TYPE: args.connect_type}
config = {
consts.SOURCE_TYPE: args.connect_type,
consts.SECRET_MANAGER_TYPE: getattr(args, consts.SECRET_MANAGER_TYPE),
consts.SECRET_MANAGER_PROJECT_ID: getattr(
args, consts.SECRET_MANAGER_PROJECT_ID
),
}

if args.connect_type == "Raw":
return json.loads(args.json)
Expand Down
26 changes: 21 additions & 5 deletions data_validation/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from ibis.backends.postgres.client import PostgreSQLClient
from third_party.ibis.ibis_cloud_spanner.api import connect as spanner_connect
from third_party.ibis.ibis_impala.api import impala_connect

from data_validation import client_info, consts, exceptions
from data_validation.secret_manager import SecretManagerBuilder

ibis.options.sql.default_limit = None

Expand Down Expand Up @@ -224,12 +224,28 @@ def get_data_client(connection_config):
"""Return DataClient client from given configuration"""
connection_config = copy.deepcopy(connection_config)
source_type = connection_config.pop(consts.SOURCE_TYPE)
secret_manager_type = connection_config.pop(consts.SECRET_MANAGER_TYPE, None)
secret_manager_project_id = connection_config.pop(
consts.SECRET_MANAGER_PROJECT_ID, None
)

decrypted_connection_config = {}
if secret_manager_type is not None:
sm = SecretManagerBuilder().build(secret_manager_type.lower())
for config_item in connection_config:
decrypted_connection_config[config_item] = sm.maybe_secret(
secret_manager_project_id, connection_config[config_item]
)
else:
decrypted_connection_config = connection_config

# The ibis_bigquery.connect expects a credentials object, not a string.
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in connection_config:
key_path = connection_config.pop(consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH)
if consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH in decrypted_connection_config:
key_path = decrypted_connection_config.pop(
consts.GOOGLE_SERVICE_ACCOUNT_KEY_PATH
)
if key_path:
connection_config[
decrypted_connection_config[
"credentials"
] = google.oauth2.service_account.Credentials.from_service_account_file(
key_path
Expand All @@ -242,7 +258,7 @@ def get_data_client(connection_config):
raise Exception(msg)

try:
data_client = CLIENT_LOOKUP[source_type](**connection_config)
data_client = CLIENT_LOOKUP[source_type](**decrypted_connection_config)
data_client._source_type = source_type
except Exception as e:
msg = 'Connection Type "{source_type}" could not connect: {error}'.format(
Expand Down
1 change: 0 additions & 1 deletion data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,6 @@ def build_column_configs(self, columns):
casefold_target_columns = {x.casefold(): str(x) for x in target_table.columns}

for column in columns:

if column.casefold() not in casefold_source_columns:
raise ValueError(f"Grouped Column DNE in source: {column}")
if column.casefold() not in casefold_target_columns:
Expand Down
2 changes: 2 additions & 0 deletions data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

# Configuration Fields
SOURCE_TYPE = "source_type"
SECRET_MANAGER_TYPE = "secret_manager_type"
SECRET_MANAGER_PROJECT_ID = "secret_manager_project_id"
CONFIG = "config"
CONFIG_FILE = "config_file"
CONFIG_SOURCE_CONN_NAME = "source_conn_name"
Expand Down
2 changes: 0 additions & 2 deletions data_validation/partition_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

class PartitionBuilder:
def __init__(self, config_managers: List[ConfigManager], args: Namespace) -> None:

self.config_managers = config_managers
self.table_count = len(config_managers)
self.args = args
Expand Down Expand Up @@ -97,7 +96,6 @@ def _get_partition_key_filters(self) -> List[List[str]]:
master_filter_list = []

for config_manager in self.config_managers:

validation_builder = ValidationBuilder(config_manager)

source_partition_row_builder = PartitionRowBuilder(
Expand Down
1 change: 0 additions & 1 deletion data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ def compile(self, ibis_table):

class CalculatedField(object):
def __init__(self, ibis_expr, config, fields, cast=None, **kwargs):

"""A representation of an calculated field to build a query.
Args:
Expand Down
1 change: 0 additions & 1 deletion data_validation/result_handlers/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ def get_handler_for_project(
return BigQueryResultHandler(client, status_list=status_list, table_id=table_id)

def execute(self, result_df):

if self._status_list is not None:
result_df = filter_validation_status(self._status_list, result_df)

Expand Down
52 changes: 52 additions & 0 deletions data_validation/secret_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging


class SecretManagerBuilder:
def build(self, client_type):
"""
:param client_type:
:return: secret manager instance currently support gcp secret manager
"""
if client_type.lower() == "gcp":
return GCPSecretManager()
else:
raise Exception(f"{client_type} is not supported yet.")


class GCPSecretManager:
"""
GCPSecretManager: client to access secrets stored at GCP secret manager
"""

def __init__(self):
# Import the Secret Manager client library.
from google.cloud import secretmanager

# Create the Secret Manager client.
self.client = secretmanager.SecretManagerServiceClient()

def maybe_secret(self, project_id, secret_id, version_id="latest"):
"""
Get information about the given secret.
:return String value with the secret value or the secret id if the secret value if not exists
"""
try:
# Build the resource name of the secret.
name = f"projects/{project_id}/secrets/{secret_id}/versions/{version_id}"
# Access the secret version.
response = self.client.access_secret_version(name=name)
# Return the decoded payload.
payload = response.payload.data.decode("UTF-8")
return payload
except Exception as e:
logging.warning(f"{secret_id} : {e}")
return secret_id
85 changes: 82 additions & 3 deletions docs/connections.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@ eg.
The following commands can be used to create connections:

## Command template to create a connection:
Secret manager flags are optional

--secret-manager-type <None|GCP>
--secret-manager-project-id <SECRET_PROJECT_ID>

```
data-validation connections add --connection-name CONN_NAME source-type
data-validation connections add --connection-name CONN_NAME source-type --secret-manager-type <None|GCP> --secret-manager-project-id <SECRET_PROJECT_ID>
```

## Create a sample BigQuery connection:
Expand Down Expand Up @@ -68,13 +73,19 @@ Below is the expected configuration for each type.
```
{
# Raw JSON config for a connection
"json": '{"source-type": "BigQuery", "project-id": "pso-kokoro-resources", "google-service-account-key-path": null}'
"json": '{ "secret_manager_type": null, "secret_manager_project_id": null, "source-type": "BigQuery", "project-id": "pso-kokoro-resources", "google-service-account-key-path": null}'
}
```

## Google BigQuery
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "BigQuery",
Expand All @@ -99,6 +110,12 @@ Below is the expected configuration for each type.
## Google Spanner
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager type
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Spanner",
Expand All @@ -124,6 +141,13 @@ Please note that Teradata is not-native to this package and must be installed
via `pip install teradatasql` if you have a license.
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Teradata",
Expand All @@ -142,6 +166,12 @@ Please note the Oracle package is not installed by default. You will need to fol
Then `pip install cx_Oracle`.
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Oracle",
Expand All @@ -160,6 +190,12 @@ Please note the MSSQL Server package is not installed by default. You will need
Then `pip install pyodbc`.
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "MSSQL",
Expand All @@ -176,6 +212,12 @@ Then `pip install pyodbc`.
## Postgres
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Postgres",
Expand All @@ -192,6 +234,12 @@ Then `pip install pyodbc`.
Please note AlloyDB supports same connection config as Postgres.
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Postgres",
Expand All @@ -207,6 +255,12 @@ Please note AlloyDB supports same connection config as Postgres.
## MySQL
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "MySQL",
Expand All @@ -222,6 +276,12 @@ Please note AlloyDB supports same connection config as Postgres.
## Redshift
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Redshift",
Expand Down Expand Up @@ -254,6 +314,12 @@ Please note AlloyDB supports same connection config as Postgres.
## Impala
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "Impala",
Expand All @@ -277,6 +343,13 @@ Please note that for Group By validations, the following property must be set in

```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Hive is based off Impala connector
"source-type": "Impala",
Expand All @@ -292,6 +365,12 @@ Only Hive >=0.11 is supported due to [impyla](https://github.com/cloudera/impyla
## DB2
```
{
# secret manager type
"secret_manager_type": "GCP",
# secret manager project id
"secret_manager_project_id": "secrets-project-id",
# Configuration Required for All Data Sources
"source-type": "DB2",
Expand Down
12 changes: 12 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,3 +256,15 @@ def integration_hive(session):
raise Exception("Expected Env Var: %s" % env_var)

session.run("pytest", "tests/system/data_sources/test_hive.py", *session.posargs)


@nox.session(python=random.choice(PYTHON_VERSIONS), venv_backend="venv")
def integration_secrets(session):
"""
Run SecretManager integration tests.
Ensure the SecretManager is running as expected.
"""
_setup_session_requirements(session, extra_packages=[])

test_path = "tests/system/test_secret_manager.py"
session.run("pytest", test_path, *session.posargs)
Loading

0 comments on commit d6c40f1

Please sign in to comment.