From 50fac2855be4b3ca0607dd6a777699a778c56d14 Mon Sep 17 00:00:00 2001 From: Neha Nene Date: Thu, 12 May 2022 15:25:02 -0500 Subject: [PATCH] feat: Add example of BigQuery cast to NUMERIC, update chore release version (#476) * add YAML example of BQ NUMERIC cast * chore: release 1.7.2 Release-As: 1.7.2 --- docs/examples.md | 58 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/docs/examples.md b/docs/examples.md index 20f24835a..56969c552 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -140,7 +140,7 @@ data-validation query --query, -q The Raw query to run against the supplied connection ```` -#### Sample YAML file (Grouped Column validation) +#### Sample YAML Config (Grouped Column validation) ```yaml result_handler: project_id: my-project-id @@ -155,19 +155,12 @@ validations: source_column: null target_column: null type: count - - field_alias: sum__num_bikes_available - source_column: num_bikes_available - target_column: num_bikes_available - type: sum - cast: float64 - - field_alias: sum__num_docks_available - source_column: num_docks_available - target_column: num_docks_available - type: sum + calculated_fields: [] filters: - source: region_id=71 target: region_id=71 type: custom + format: table grouped_columns: - cast: null field_alias: region_id @@ -175,16 +168,59 @@ validations: target_column: region_id labels: - !!python/tuple - - description + - user - test + random_row_batch_size: null schema_name: bigquery-public-data.new_york_citibike table_name: citibike_stations target_schema_name: bigquery-public-data.new_york_citibike target_table_name: citibike_stations threshold: 0.0 type: Column + use_random_rows: false ``` +#### Sample YAML with Calc Fields (Cast to NUMERIC before aggregation) + +The NUMERIC data type in BigQuery is equivalent to DECIMAL(38,9). This configuration +will run a SUM(CAST(column to NUMERIC)) to avoid integer overflow. + +```yaml +result_handler: {} +source: my_bq_conn +target: my_bq_conn +validations: +- aggregates: + - field_alias: count + source_column: null + target_column: null + type: count + - field_alias: sum__int + source_column: cast__int + target_column: cast__int + type: sum + calculated_fields: + - depth: 0 + field_alias: cast__int + source_calculated_columns: + - int + target_calculated_columns: + - int + type: cast + default_cast: decimal(38,9) + filters: [] + format: table + labels: [] + random_row_batch_size: null + schema_name: project.dataset + table_name: my_table + target_schema_name: project.dataset + target_table_name: my_table + threshold: 0.0 + type: Column + use_random_rows: false +``` + #### Run a custom query column validation ````shell script data-validation validate --custom-query-type column custom-query --source-query-file source_query.sql --target-query-file target_query.sql -sc my_bq_conn -tc my_bq_conn -tbls bigquery-public-data.new_york_citibike.citibike_stations