diff --git a/data_validation/combiner.py b/data_validation/combiner.py index c6fef8fdf..acaff0214 100644 --- a/data_validation/combiner.py +++ b/data_validation/combiner.py @@ -18,6 +18,7 @@ original data type is used. """ +import datetime import functools import json @@ -290,6 +291,8 @@ def _join_pivots(source, target, differences, join_on_fields): def _add_metadata(joined, run_metadata): # TODO: Add source and target queries to metadata + run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc) + joined = joined[ joined, ibis.literal(run_metadata.run_id).name("run_id"), diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index 75b8029a6..76a62ad2d 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime import json import logging import warnings @@ -293,7 +292,6 @@ def _execute_validation(self, validation_builder, process_in_memory=True): verbose=self.verbose, ) - self.run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc) return result_df def combine_data(self, source_df, target_df, join_on_fields): diff --git a/data_validation/query_builder/query_builder.py b/data_validation/query_builder/query_builder.py index c5d8ac2f0..25f98f896 100644 --- a/data_validation/query_builder/query_builder.py +++ b/data_validation/query_builder/query_builder.py @@ -102,12 +102,12 @@ def compile(self, ibis_table): else: agg_field = self.expr(ibis_table) - if self.alias: - agg_field = agg_field.name(self.alias) - if self.cast: agg_field = agg_field.cast(self.cast) + if self.alias: + agg_field = agg_field.name(self.alias) + return agg_field diff --git a/docs/examples.md b/docs/examples.md index ae75c0e13..6bf2fdb85 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -141,6 +141,7 @@ result_handler: project_id: my-project-id table_id: pso_data_validator.results type: BigQuery + google_service_account_key_path: path/to/sa.json source: my_bq_conn target: my_bq_conn validations: @@ -153,6 +154,7 @@ validations: source_column: num_bikes_available target_column: num_bikes_available type: sum + cast: float64 - field_alias: sum__num_docks_available source_column: num_docks_available target_column: num_docks_available diff --git a/tests/unit/test_combiner.py b/tests/unit/test_combiner.py index 64461633e..8f61c5c81 100644 --- a/tests/unit/test_combiner.py +++ b/tests/unit/test_combiner.py @@ -23,6 +23,7 @@ _NAN = float("nan") +FAKE_TIME = datetime.datetime(1998, 9, 4, 7, 31, 42) EXAMPLE_RUN_METADATA = metadata.RunMetadata( validations={ @@ -52,6 +53,16 @@ def module_under_test(): return combiner +@pytest.fixture +def patch_datetime_now(monkeypatch): + class mydatetime: + @classmethod + def now(cls, utc): + return FAKE_TIME + + monkeypatch.setattr(datetime, "datetime", mydatetime) + + def test_generate_report_with_different_columns(module_under_test): source = pandas.DataFrame({"count": [1], "sum": [3]}) target = pandas.DataFrame({"count": [2]}) @@ -116,7 +127,7 @@ def test_generate_report_with_too_many_rows(module_under_test): ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "test_label")], run_id="test-run", ), @@ -165,7 +176,7 @@ def test_generate_report_with_too_many_rows(module_under_test): ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "test_label")], run_id="test-run", ), @@ -222,7 +233,7 @@ def test_generate_report_with_too_many_rows(module_under_test): ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "test_label")], run_id="test-run", ), @@ -278,7 +289,7 @@ def test_generate_report_with_too_many_rows(module_under_test): ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "test_label")], run_id="test-run", ), @@ -314,7 +325,7 @@ def test_generate_report_with_too_many_rows(module_under_test): ), ) def test_generate_report_without_group_by( - module_under_test, source_df, target_df, run_metadata, expected + module_under_test, patch_datetime_now, source_df, target_df, run_metadata, expected ): pandas_client = ibis.backends.pandas.connect( {"test_source": source_df, "test_target": target_df} @@ -375,7 +386,7 @@ def test_generate_report_without_group_by( ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "group_label")], run_id="grouped-test", ), @@ -426,7 +437,7 @@ def test_generate_report_without_group_by( ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "group_label")], run_id="grouped-test", ), @@ -484,7 +495,7 @@ def test_generate_report_without_group_by( ), }, start_time=datetime.datetime(1998, 9, 4, 7, 30, 1), - end_time=datetime.datetime(1998, 9, 4, 7, 31, 42), + end_time=None, labels=[("name", "group_label")], run_id="grouped-test", ), @@ -535,7 +546,13 @@ def test_generate_report_without_group_by( ), ) def test_generate_report_with_group_by( - module_under_test, source_df, target_df, join_on_fields, run_metadata, expected + module_under_test, + patch_datetime_now, + source_df, + target_df, + join_on_fields, + run_metadata, + expected, ): pandas_client = ibis.backends.pandas.connect( {"test_source": source_df, "test_target": target_df}