From 89893505e420e4c8feefa1fbf61a35259969dd76 Mon Sep 17 00:00:00 2001 From: AJ Welch <95496513+ajwelch4@users.noreply.github.com> Date: Fri, 12 May 2023 15:56:04 -0400 Subject: [PATCH] feat: Add --dry-run option to validate. (#778) * Add --dry-run option to validate. * Document --dry-run in README. * Blacken --- README.md | 25 +++++++++++++++++++++++++ data_validation/__main__.py | 22 ++++++++++++++++++---- data_validation/cli_tools.py | 7 +++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bbfb95aba..ffa6045a7 100644 --- a/README.md +++ b/README.md @@ -376,6 +376,31 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query row The [Examples](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/examples.md) page provides few examples of how this tool can be used to run custom query row validations. +#### Dry Run Validation + +The `validate` command takes a `--dry-run` command line flag that prints source +and target SQL to stdout as JSON in lieu of performing a validation: + +``` +data-validation (--verbose or -v) (--log-level or -ll) validate + [--dry-run or -dr] Prints source and target SQL to stdout in lieu of performing a validation. +``` + +For example, this flag can be used as follows: + +```shell +> data-validation validate --dry-run row \ + -sc my_bq_conn \ + -tc my_bq_conn \ + -tbls bigquery-public-data.new_york_citibike.citibike_stations \ + --primary-keys station_id \ + --hash '*' +{ + "source_query": "SELECT `hash__all`, `station_id`\nFROM ...", + "target_query": "SELECT `hash__all`, `station_id`\nFROM ..." +} +``` + ### YAML Configuration Files You can customize the configuration for any given validation by providing use diff --git a/data_validation/__main__.py b/data_validation/__main__.py index 37e1cd023..9abc6d7e4 100644 --- a/data_validation/__main__.py +++ b/data_validation/__main__.py @@ -419,11 +419,12 @@ def convert_config_to_yaml(args, config_managers): return yaml_config -def run_validation(config_manager, verbose=False): +def run_validation(config_manager, dry_run=False, verbose=False): """Run a single validation. Args: config_manager (ConfigManager): Validation config manager instance. + dry_run (bool): Print source and target SQL to stdout in lieu of validation. verbose (bool): Validation setting to log queries run. """ validator = DataValidation( @@ -432,7 +433,18 @@ def run_validation(config_manager, verbose=False): result_handler=None, verbose=verbose, ) - validator.execute() + if dry_run: + print( + json.dumps( + { + "source_query": validator.validation_builder.get_source_query().compile(), + "target_query": validator.validation_builder.get_target_query().compile(), + }, + indent=4, + ) + ) + else: + validator.execute() def run_validations(args, config_managers): @@ -449,7 +461,9 @@ def run_validations(args, config_managers): config_manager.config[consts.CONFIG_FILE], ) try: - run_validation(config_manager, verbose=args.verbose) + run_validation( + config_manager, dry_run=args.dry_run, verbose=args.verbose + ) except Exception as e: logging.error( "Error %s occured while running config file %s. Skipping it for now.", @@ -457,7 +471,7 @@ def run_validations(args, config_managers): config_manager.config[consts.CONFIG_FILE], ) else: - run_validation(config_manager, verbose=args.verbose) + run_validation(config_manager, dry_run=args.dry_run, verbose=args.verbose) def store_yaml_config_file(args, config_managers): diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index f48e936b9..f68f863d2 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -399,6 +399,13 @@ def _configure_validate_parser(subparsers): "validate", help="Run a validation and optionally store to config" ) + validate_parser.add_argument( + "--dry-run", + "-dr", + action="store_true", + help="Prints source and target SQL to stdout in lieu of performing a validation.", + ) + validate_subparsers = validate_parser.add_subparsers(dest="validate_cmd") column_parser = validate_subparsers.add_parser(