Improve data_utils (#463)

* Add normal stat support to statistical_to_dataframe * fix _get_failed_batch_response * update test cases
sentinel-hub · May 9, 2023 · eb60dac · eb60dac
1 parent c112f0f
commit eb60dac
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 42 deletions.
diff --git a/sentinelhub/data_utils.py b/sentinelhub/data_utils.py
@@ -89,6 +89,11 @@ def _is_batch_stat(result_data: JsonDict) -> bool:
     return "id" in result_data
 
 
+def _is_valid_batch_response(result_data: JsonDict) -> bool:
+    """Identifies whether there is a valid batch response"""
+    return "error" not in result_data and result_data["response"]["status"] == "OK"
+
+
 def statistical_to_dataframe(result_data: List[JsonDict], exclude_stats: Optional[List[str]] = None) -> Any:
     """Transform (Batch) Statistical API results into a pandas.DataFrame
 
@@ -112,13 +117,21 @@ def statistical_to_dataframe(result_data: List[JsonDict], exclude_stats: Optiona
     nresults = len(result_data)
     dfs = [None] * nresults
     for idx in range(nresults):
-        identifier, response = result_data[idx]["identifier"], result_data[idx]["response"]
-        if response:
-            result_entries = _extract_response_data(response["data"], exclude_stats)
-            result_df = pandas.DataFrame(result_entries)
-            result_df["identifier"] = identifier
-            dfs[idx] = result_df
-
+        result = result_data[idx]
+
+        # valid batch stat response
+        if _is_batch_stat(result) and _is_valid_batch_response(result):
+            identifier, response_data = result["identifier"], result["response"]["data"]
+
+        # valid normal stat response
+        elif not _is_batch_stat(result) and "data" in result:
+            identifier, response_data = str(idx), result["data"]
+        else:
+            continue
+        result_entries = _extract_response_data(response_data, exclude_stats)
+        result_df = pandas.DataFrame(result_entries)
+        result_df["identifier"] = identifier
+        dfs[idx] = result_df
     return pandas.concat(dfs)
 
 
@@ -139,7 +152,7 @@ def _get_failed_batch_response(result_data: JsonDict) -> Union[str, List[Tuple[s
     :param result_data: An input representation of the (Batch) Statistical API result of a geometry.
     :return: Failed responses and responses with failed intervals
     """
-    if "error" in result_data or not result_data["response"]:
+    if "error" in result_data or result_data["response"]["status"] == "FAILED":
         return _FULL_TIME_RANGE
     return _get_failed_intervals(result_data["response"]["data"])
 

diff --git a/tests/TestInputs/batch_stat_failed_results.json b/tests/TestInputs/batch_stat_failed_results.json
@@ -2,7 +2,7 @@
     {
         "id": 3,
         "identifier": "SI21.FOI.6620269001",
-        "response": null
+        "response": {"data":null,"status":"FAILED"}
     },
     {
         "id": 4,

diff --git a/tests/TestInputs/normal_stat_partial_result.json b/tests/TestInputs/normal_stat_partial_result.json
@@ -14,36 +14,75 @@
                     "to": "2020-06-13T00:00:00Z"
                 },
                 "outputs": {
-                    "rgb": {
+                    "ndvi": {
                         "bands": {
-                            "R": {
+                            "B0": {
                                 "stats": {
-                                    "min": 0.004600000102072954,
-                                    "max": 0.7160000205039978,
-                                    "mean": 0.11546704480268109,
-                                    "stDev": 0.06332157724593372,
-                                    "sampleCount": 660657,
-                                    "noDataCount": 0
-                                }
-                            },
-                            "B": {
-                                "stats": {
-                                    "min": 0.05920000001788139,
-                                    "max": 0.5658000111579895,
-                                    "mean": 0.11913311262009575,
-                                    "stDev": 0.04636540384197817,
-                                    "sampleCount": 660657,
-                                    "noDataCount": 0
-                                }
-                            },
-                            "G": {
-                                "stats": {
-                                    "min": 0.03779999911785126,
-                                    "max": 0.6126000285148621,
-                                    "mean": 0.11290437308774512,
-                                    "stDev": 0.048338260231641964,
-                                    "sampleCount": 660657,
-                                    "noDataCount": 0
+                                    "min": 0.23480869829654694,
+                                    "max": 0.7734549045562744,
+                                    "mean": 0.38401383599010064,
+                                    "stDev": 0.11153442042832748,
+                                    "sampleCount": 112,
+                                    "noDataCount": 3,
+                                    "percentiles": {
+                                        "50.0": 0.3445783257484436
+                                    }
+                                },
+                                "histogram": {
+                                    "bins": [
+                                        {
+                                            "lowEdge": -1.0,
+                                            "highEdge": -0.8,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": -0.8,
+                                            "highEdge": -0.6,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": -0.6,
+                                            "highEdge": -0.3999999999999999,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": -0.3999999999999999,
+                                            "highEdge": -0.19999999999999996,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": -0.19999999999999996,
+                                            "highEdge": 0.0,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": 0.0,
+                                            "highEdge": 0.20000000000000018,
+                                            "count": 0
+                                        },
+                                        {
+                                            "lowEdge": 0.20000000000000018,
+                                            "highEdge": 0.40000000000000013,
+                                            "count": 68
+                                        },
+                                        {
+                                            "lowEdge": 0.40000000000000013,
+                                            "highEdge": 0.6000000000000001,
+                                            "count": 35
+                                        },
+                                        {
+                                            "lowEdge": 0.6000000000000001,
+                                            "highEdge": 0.8,
+                                            "count": 6
+                                        },
+                                        {
+                                            "lowEdge": 0.8,
+                                            "highEdge": 1.0,
+                                            "count": 0
+                                        }
+                                    ],
+                                    "overflowCount": 0,
+                                    "underflowCount": 0
                                 }
                             }
                         }

diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py
@@ -16,13 +16,24 @@
 ]
 
 
-def test_statistical_to_dataframe(input_folder: str) -> None:
-    batch_stat_results_path = os.path.join(input_folder, "batch_stat_results.json")
+@pytest.mark.parametrize(
+    "result_file, expected_npolygons, expected_ncolumns, expected_nrows",
+    [
+        ("batch_stat_results.json", 2, 12, 2),
+        ("batch_stat_failed_results.json", 1, 12, 1),
+        ("normal_stat_result.json", 1, 12, 1),
+        ("normal_stat_partial_result.json", 1, 12, 1),
+    ],
+)
+def test_statistical_to_dataframe(
+    input_folder: str, result_file: str, expected_npolygons: int, expected_ncolumns: int, expected_nrows: int
+) -> None:
+    batch_stat_results_path = os.path.join(input_folder, result_file)
     batch_stat_results = read_data(batch_stat_results_path)
     df = statistical_to_dataframe(batch_stat_results)
-    assert len(set(df["identifier"])) == 2, "Wrong number of polygons"
-    assert len(df.columns) == 12, "Wrong number of columns"
-    assert len(df) == 2, "Wrong number of valid rows"
+    assert len(set(df["identifier"])) == expected_npolygons, "Wrong number of polygons"
+    assert len(df.columns) == expected_ncolumns, "Wrong number of columns"
+    assert len(df) == expected_nrows, "Wrong number of valid rows"
     for data_type, columns in column_type_pairs:
         assert all(isinstance(df[column].iloc[0], data_type) for column in columns), "Wrong data type of columns"