Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Issue348 casting #349

Merged
merged 3 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions data_validation/combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
original data type is used.
"""

import datetime
import functools
import json

Expand Down Expand Up @@ -290,6 +291,8 @@ def _join_pivots(source, target, differences, join_on_fields):

def _add_metadata(joined, run_metadata):
# TODO: Add source and target queries to metadata
run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc)

joined = joined[
joined,
ibis.literal(run_metadata.run_id).name("run_id"),
Expand Down
2 changes: 0 additions & 2 deletions data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import json
import logging
import warnings
Expand Down Expand Up @@ -293,7 +292,6 @@ def _execute_validation(self, validation_builder, process_in_memory=True):
verbose=self.verbose,
)

self.run_metadata.end_time = datetime.datetime.now(datetime.timezone.utc)
return result_df

def combine_data(self, source_df, target_df, join_on_fields):
Expand Down
6 changes: 3 additions & 3 deletions data_validation/query_builder/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,12 @@ def compile(self, ibis_table):
else:
agg_field = self.expr(ibis_table)

if self.alias:
agg_field = agg_field.name(self.alias)

if self.cast:
agg_field = agg_field.cast(self.cast)

if self.alias:
agg_field = agg_field.name(self.alias)

return agg_field


Expand Down
2 changes: 2 additions & 0 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ result_handler:
project_id: my-project-id
table_id: pso_data_validator.results
type: BigQuery
google_service_account_key_path: path/to/sa.json
source: my_bq_conn
target: my_bq_conn
validations:
Expand All @@ -153,6 +154,7 @@ validations:
source_column: num_bikes_available
target_column: num_bikes_available
type: sum
cast: float64
- field_alias: sum__num_docks_available
source_column: num_docks_available
target_column: num_docks_available
Expand Down
35 changes: 26 additions & 9 deletions tests/unit/test_combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@


_NAN = float("nan")
FAKE_TIME = datetime.datetime(1998, 9, 4, 7, 31, 42)

EXAMPLE_RUN_METADATA = metadata.RunMetadata(
validations={
Expand Down Expand Up @@ -52,6 +53,16 @@ def module_under_test():
return combiner


@pytest.fixture
def patch_datetime_now(monkeypatch):
class mydatetime:
@classmethod
def now(cls, utc):
return FAKE_TIME

monkeypatch.setattr(datetime, "datetime", mydatetime)


def test_generate_report_with_different_columns(module_under_test):
source = pandas.DataFrame({"count": [1], "sum": [3]})
target = pandas.DataFrame({"count": [2]})
Expand Down Expand Up @@ -116,7 +127,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -165,7 +176,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -222,7 +233,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -278,7 +289,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "test_label")],
run_id="test-run",
),
Expand Down Expand Up @@ -314,7 +325,7 @@ def test_generate_report_with_too_many_rows(module_under_test):
),
)
def test_generate_report_without_group_by(
module_under_test, source_df, target_df, run_metadata, expected
module_under_test, patch_datetime_now, source_df, target_df, run_metadata, expected
):
pandas_client = ibis.backends.pandas.connect(
{"test_source": source_df, "test_target": target_df}
Expand Down Expand Up @@ -375,7 +386,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -426,7 +437,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -484,7 +495,7 @@ def test_generate_report_without_group_by(
),
},
start_time=datetime.datetime(1998, 9, 4, 7, 30, 1),
end_time=datetime.datetime(1998, 9, 4, 7, 31, 42),
end_time=None,
labels=[("name", "group_label")],
run_id="grouped-test",
),
Expand Down Expand Up @@ -535,7 +546,13 @@ def test_generate_report_without_group_by(
),
)
def test_generate_report_with_group_by(
module_under_test, source_df, target_df, join_on_fields, run_metadata, expected
module_under_test,
patch_datetime_now,
source_df,
target_df,
join_on_fields,
run_metadata,
expected,
):
pandas_client = ibis.backends.pandas.connect(
{"test_source": source_df, "test_target": target_df}
Expand Down