Skip to content

Commit

Permalink
feat: Add --dry-run option to validate. (#778)
Browse files Browse the repository at this point in the history
* Add --dry-run option to validate.

* Document --dry-run in README.

* Blacken
  • Loading branch information
ajwelch4 committed May 12, 2023
1 parent b3a828c commit 8989350
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,31 @@ data-validation (--verbose or -v) (--log-level or -ll) validate custom-query row
The [Examples](https://github.com/GoogleCloudPlatform/professional-services-data-validator/blob/develop/docs/examples.md)
page provides few examples of how this tool can be used to run custom query row validations.

#### Dry Run Validation

The `validate` command takes a `--dry-run` command line flag that prints source
and target SQL to stdout as JSON in lieu of performing a validation:

```
data-validation (--verbose or -v) (--log-level or -ll) validate
[--dry-run or -dr] Prints source and target SQL to stdout in lieu of performing a validation.
```

For example, this flag can be used as follows:

```shell
> data-validation validate --dry-run row \
-sc my_bq_conn \
-tc my_bq_conn \
-tbls bigquery-public-data.new_york_citibike.citibike_stations \
--primary-keys station_id \
--hash '*'
{
"source_query": "SELECT `hash__all`, `station_id`\nFROM ...",
"target_query": "SELECT `hash__all`, `station_id`\nFROM ..."
}
```

### YAML Configuration Files

You can customize the configuration for any given validation by providing use
Expand Down
22 changes: 18 additions & 4 deletions data_validation/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,11 +419,12 @@ def convert_config_to_yaml(args, config_managers):
return yaml_config


def run_validation(config_manager, verbose=False):
def run_validation(config_manager, dry_run=False, verbose=False):
"""Run a single validation.
Args:
config_manager (ConfigManager): Validation config manager instance.
dry_run (bool): Print source and target SQL to stdout in lieu of validation.
verbose (bool): Validation setting to log queries run.
"""
validator = DataValidation(
Expand All @@ -432,7 +433,18 @@ def run_validation(config_manager, verbose=False):
result_handler=None,
verbose=verbose,
)
validator.execute()
if dry_run:
print(
json.dumps(
{
"source_query": validator.validation_builder.get_source_query().compile(),
"target_query": validator.validation_builder.get_target_query().compile(),
},
indent=4,
)
)
else:
validator.execute()


def run_validations(args, config_managers):
Expand All @@ -449,15 +461,17 @@ def run_validations(args, config_managers):
config_manager.config[consts.CONFIG_FILE],
)
try:
run_validation(config_manager, verbose=args.verbose)
run_validation(
config_manager, dry_run=args.dry_run, verbose=args.verbose
)
except Exception as e:
logging.error(
"Error %s occured while running config file %s. Skipping it for now.",
str(e),
config_manager.config[consts.CONFIG_FILE],
)
else:
run_validation(config_manager, verbose=args.verbose)
run_validation(config_manager, dry_run=args.dry_run, verbose=args.verbose)


def store_yaml_config_file(args, config_managers):
Expand Down
7 changes: 7 additions & 0 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,13 @@ def _configure_validate_parser(subparsers):
"validate", help="Run a validation and optionally store to config"
)

validate_parser.add_argument(
"--dry-run",
"-dr",
action="store_true",
help="Prints source and target SQL to stdout in lieu of performing a validation.",
)

validate_subparsers = validate_parser.add_subparsers(dest="validate_cmd")

column_parser = validate_subparsers.add_parser(
Expand Down

0 comments on commit 8989350

Please sign in to comment.