From 414d7f83bc65be2f17dd6103b7667409ff9b0269 Mon Sep 17 00:00:00 2001 From: mloeberc <105311677+mloeberc@users.noreply.github.com> Date: Mon, 3 Apr 2023 12:51:36 -0500 Subject: [PATCH] feat: add Impala connection optional parameters (#743) (#790) * feat: add Impala connection optional parameters * fix: reformatting file with black and flake8 --- data_validation/cli_tools.py | 16 ++++++++++++++++ docs/connections.md | 10 +++++++++- third_party/ibis/ibis_impala/api.py | 18 ++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index a96a7d362..3bc9e40e2 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -128,6 +128,22 @@ "kerberos_service_name", "Desired Kerberos service name ('impala' if not provided)", ], + ["use_ssl", "Use SSL when connecting to HiveServer2 (default is False)"], + [ + "timeout", + "Connection timeout in seconds when communicating with HiveServer2 (default is 45)", + ], + [ + "ca_cert", + "Local path to 3rd party CA certificate or copy of server certificate for self-signed certificates. If SSL is enabled, but this argument is None, then certificate validation is skipped.", + ], + ["user", "LDAP user to authenticate"], + ["password", "LDAP password to authenticate"], + [ + "pool_size", + "Size of the connection pool. Typically this is not necessary to configure. (default is 8)", + ], + ["hdfs_client", "An existing HDFS client"], ], "DB2": [ ["host", "Desired DB2 host"], diff --git a/docs/connections.md b/docs/connections.md index bddcfe2fd..98efd60bf 100644 --- a/docs/connections.md +++ b/docs/connections.md @@ -332,7 +332,15 @@ Please note AlloyDB supports same connection config as Postgres. "host": "127.0.0.1", "port": 10000, "database": "default", - "auth-mechanism":"PLAIN" + "auth-mechanism": "PLAIN", + # (Optional) + "use-ssl": False, + "timeout": 45, + "ca-cert": "path-certificate", + "user": "user", + "password": "password", + "pool-size": 10, + "hdfs-client": "hdfs-client" } ``` diff --git a/third_party/ibis/ibis_impala/api.py b/third_party/ibis/ibis_impala/api.py index a3f079a06..025abddf1 100644 --- a/third_party/ibis/ibis_impala/api.py +++ b/third_party/ibis/ibis_impala/api.py @@ -33,6 +33,13 @@ def impala_connect( database="default", auth_mechanism="PLAIN", kerberos_service_name="impala", + use_ssl=False, + timeout=45, + ca_cert=None, + user=None, + password=None, + pool_size=8, + hdfs_client=None ): auth_mechanism = (auth_mechanism, "PLAIN")[auth_mechanism is None] database = (database, "default")[database is None] @@ -40,12 +47,23 @@ def impala_connect( kerberos_service_name = (kerberos_service_name, "impala")[ kerberos_service_name is None ] + use_ssl = (use_ssl, False)[use_ssl is None] + timeout = (timeout, 45)[timeout is None] + pool_size = (pool_size, 8)[pool_size is None] + return connect( host=host, port=int(port), database=database, auth_mechanism=auth_mechanism, kerberos_service_name=kerberos_service_name, + use_ssl=use_ssl, + timeout=timeout, + ca_cert=ca_cert, + user=user, + password=password, + pool_size=pool_size, + hdfs_client=hdfs_client )