From de3758ef4b40eb6c443658546d5c8f5843e58b75 Mon Sep 17 00:00:00 2001 From: sramsrinivasan <107146913+sramsrinivasan@users.noreply.github.com> Date: Mon, 10 Oct 2022 11:38:34 -0500 Subject: [PATCH] fix: Row validation optimization to avoid select all columns (#599) --- data_validation/query_builder/query_builder.py | 14 +++++++++++--- data_validation/validation_builder.py | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/data_validation/query_builder/query_builder.py b/data_validation/query_builder/query_builder.py index 67b38b75f..2c20ef248 100644 --- a/data_validation/query_builder/query_builder.py +++ b/data_validation/query_builder/query_builder.py @@ -466,7 +466,7 @@ def compile_calculated_fields(self, table, n=0): # else: # return [field.compile(table) for field in self.calculated_fields] - def compile(self, data_client, schema_name, table_name): + def compile(self, validation_type, data_client, schema_name, table_name): """Return an Ibis query object Args: @@ -486,8 +486,16 @@ def compile(self, data_client, schema_name, table_name): calc_table = calc_table.mutate( self.compile_calculated_fields(calc_table, n) ) - if self.comparison_fields: - calc_table = calc_table.mutate(self.compile_comparison_fields(calc_table)) + + if validation_type == consts.ROW_VALIDATION: + calc_table = calc_table.projection( + self.compile_comparison_fields(calc_table) + ) + else: + if self.comparison_fields: + calc_table = calc_table.mutate( + self.compile_comparison_fields(calc_table) + ) compiled_filters = self.compile_filter_fields(calc_table) filtered_table = ( calc_table.filter(compiled_filters) if compiled_filters else calc_table diff --git a/data_validation/validation_builder.py b/data_validation/validation_builder.py index bbfa5c638..557863079 100644 --- a/data_validation/validation_builder.py +++ b/data_validation/validation_builder.py @@ -383,7 +383,7 @@ def get_source_query(self): f"Input custom query type: {self.config_manager.custom_query_type}" ) else: - query = self.source_builder.compile(**source_config) + query = self.source_builder.compile(self.validation_type, **source_config) if self.verbose: logging.info(source_config) logging.info("-- ** Source Query ** --") @@ -427,7 +427,7 @@ def get_target_query(self): f"Input custom query type: {self.config_manager.custom_query_type}" ) else: - query = self.target_builder.compile(**target_config) + query = self.target_builder.compile(self.validation_type, **target_config) if self.verbose: logging.info(target_config) logging.info("-- ** Target Query ** --")