Skip to content

Commit

Permalink
fix: Issue348 casting (#349)
Browse files Browse the repository at this point in the history
* fix: fixed alias error in casting, updated end_time variable, added to yaml example

* fix: updated tests

* fix: lint
  • Loading branch information
nehanene15 committed Dec 1, 2021
1 parent 80d26c6 commit 1560c7e
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 14 deletions.
3 changes: 3 additions & 0 deletions data_validation/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
original data type is used.
"""

import datetime
import functools
import json

Expand Down Expand Up @@ -290,6 +291,8 @@ def _join_pivots(source, target, differences, join_on_fields):

def _add_metadata(joined, run_metadata):
# TODO: Add source and target queries to metadata
run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc)

joined = joined[
joined,
ibis.literal(run_metadata.run_id).name("run_id"),
Expand Down
2 changes: 0 additions & 2 deletions data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import json
import logging
import warnings
Expand Down Expand Up @@ -293,7 +292,6 @@ def _execute_validation(self, validation_builder, process_in_memory=True):
verbose=self.verbose,
)

self.run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc)
return result_df

def combine_data(self, source_df, target_df, join_on_fields):
Expand Down
6 changes: 3 additions & 3 deletions data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ def compile(self, ibis_table):
else:
agg_field = self.expr(ibis_table)

if self.alias:
agg_field = agg_field.name(self.alias)

if self.cast:
agg_field = agg_field.cast(self.cast)

if self.alias:
agg_field = agg_field.name(self.alias)

return agg_field


Expand Down
2 changes: 2 additions & 0 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ result_handler:
project_id: my-project-id
table_id: pso_data_validator.results
type: BigQuery
google_service_account_key_path: path/to/sa.json
source: my_bq_conn
target: my_bq_conn
validations:
Expand All @@ -153,6 +154,7 @@ validations:
source_column: num_bikes_available
target_column: num_bikes_available
type: sum
cast: float64
- field_alias: sum__num_docks_available
source_column: num_docks_available
target_column: num_docks_available
Expand Down
35 changes: 26 additions & 9 deletions tests/unit/test_combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@


_NAN = float("nan")
FAKE_TIME = datetime.datetime(1998, 9, 4, 7, 31, 42)

EXAMPLE_RUN_METADATA = metadata.RunMetadata(
validations={
Expand Down Expand Up @@ -52,6 +53,16 @@ def module_under_test():
return combiner


@pytest.fixture
def patch_datetime_now(monkeypatch):
class mydatetime:
@classmethod
def now(cls, utc):
return FAKE_TIME

monkeypatch.setattr(datetime, "datetime", mydatetime)


def test_generate_report_with_different_columns(module_under_test):
source = pandas.DataFrame({"count": [1], "sum": [3]})
target = pandas.DataFrame({"count": [2]})
Expand Down Expand Up @@ -116,7 +127,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -165,7 +176,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -222,7 +233,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -278,7 +289,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -314,7 +325,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
)
def test_generate_report_without_group_by(
module_under_test, source_df, target_df, run_metadata, expected
module_under_test, patch_datetime_now, source_df, target_df, run_metadata, expected
):
pandas_client = ibis.backends.pandas.connect(
{"test_source": source_df, "test_target": target_df}
Expand Down Expand Up @@ -375,7 +386,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -426,7 +437,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -484,7 +495,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -535,7 +546,13 @@ def test_generate_report_without_group_by(
),
)
def test_generate_report_with_group_by(
module_under_test, source_df, target_df, join_on_fields, run_metadata, expected
module_under_test,
patch_datetime_now,
source_df,
target_df,
join_on_fields,
run_metadata,
expected,
):
pandas_client = ibis.backends.pandas.connect(
{"test_source": source_df, "test_target": target_df}
Expand Down

0 comments on commit 1560c7e

Please sign in to comment.