GoogleCloudPlatform · latika-wadhwa · Jun 9, 2022 · Jun 5, 2022 · Jun 8, 2022 · Jun 9, 2022
@@ -97,12 +97,26 @@ def execute(self):
 def schema_validation_matching(source_fields, target_fields):
     """Compare schemas between two dictionary objects"""
     results = []
+    # Apply the casefold() function to lowercase the keys of source and target
+    source_fields_casefold = dict(
+        {
+            source_field_name.casefold(): source_field_type
+            for source_field_name, source_field_type in source_fields.items()
+        }
+    )
+    target_fields_casefold = dict(
+        {
+            target_field_name.casefold(): target_field_type
+            for target_field_name, target_field_type in target_fields.items()
+        }
+    )
+
     # Go through each source and check if target exists and matches
-    for source_field_name, source_field_type in source_fields.items():
+    for source_field_name, source_field_type in source_fields_casefold.items():
         # target field exists
-        if source_field_name in target_fields:
+        if source_field_name in target_fields_casefold:
             # target data type matches
-            if source_field_type == target_fields[source_field_name]:
+            if source_field_type == target_fields_casefold[source_field_name]:
                 results.append(
                     [
                         source_field_name,
@@ -111,7 +125,7 @@ def schema_validation_matching(source_fields, target_fields):
                         "1",
                         consts.VALIDATION_STATUS_SUCCESS,
                         "Source_type:{} Target_type:{}".format(
-                            source_field_type, target_fields[source_field_name]
+                            source_field_type, target_fields_casefold[source_field_name]
                         ),
                     ]
                 )
@@ -125,7 +139,7 @@ def schema_validation_matching(source_fields, target_fields):
                         "1",
                         consts.VALIDATION_STATUS_FAIL,
                         "Data type mismatch between source and target. Source_type:{} Target_type:{}".format(
-                            source_field_type, target_fields[source_field_name]
+                            source_field_type, target_fields_casefold[source_field_name]
                         ),
                     ]
                 )
@@ -143,8 +157,8 @@ def schema_validation_matching(source_fields, target_fields):
             )
 
     # source field doesn't exist
-    for target_field_name, target_field_type in target_fields.items():
-        if target_field_name not in source_fields:
+    for target_field_name, target_field_type in target_fields_casefold.items():
+        if target_field_name not in source_fields_casefold:
             results.append(
                 [
                     "N/A",

@@ -143,7 +143,7 @@ def test_import(module_under_test):
 
 
 def test_schema_validation_matching(module_under_test):
-    source_fields = {"field1": "string", "field2": "datetime", "field3": "string"}
+    source_fields = {"FIELD1": "string", "fiEld2": "datetime", "field3": "string"}
     target_fields = {"field1": "string", "field2": "timestamp", "field_3": "string"}
 
     expected_results = [
@@ -202,7 +202,6 @@ def test_execute(module_under_test, fs):
     failures = result_df[
         result_df["validation_status"].str.contains(consts.VALIDATION_STATUS_FAIL)
     ]
-
     assert len(result_df) == len(source_data[0]) + 1
     assert result_df["source_agg_value"].astype(float).sum() == 7
     assert result_df["target_agg_value"].astype(float).sum() == 7