Skip to content

Commit

Permalink
Improve data_utils (#463)
Browse files Browse the repository at this point in the history
* Add normal stat support to statistical_to_dataframe

* fix _get_failed_batch_response

* update test cases
  • Loading branch information
chorng committed May 9, 2023
1 parent c112f0f commit eb60dac
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 42 deletions.
29 changes: 21 additions & 8 deletions sentinelhub/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ def _is_batch_stat(result_data: JsonDict) -> bool:
return "id" in result_data


def _is_valid_batch_response(result_data: JsonDict) -> bool:
"""Identifies whether there is a valid batch response"""
return "error" not in result_data and result_data["response"]["status"] == "OK"


def statistical_to_dataframe(result_data: List[JsonDict], exclude_stats: Optional[List[str]] = None) -> Any:
"""Transform (Batch) Statistical API results into a pandas.DataFrame
Expand All @@ -112,13 +117,21 @@ def statistical_to_dataframe(result_data: List[JsonDict], exclude_stats: Optiona
nresults = len(result_data)
dfs = [None] * nresults
for idx in range(nresults):
identifier, response = result_data[idx]["identifier"], result_data[idx]["response"]
if response:
result_entries = _extract_response_data(response["data"], exclude_stats)
result_df = pandas.DataFrame(result_entries)
result_df["identifier"] = identifier
dfs[idx] = result_df

result = result_data[idx]

# valid batch stat response
if _is_batch_stat(result) and _is_valid_batch_response(result):
identifier, response_data = result["identifier"], result["response"]["data"]

# valid normal stat response
elif not _is_batch_stat(result) and "data" in result:
identifier, response_data = str(idx), result["data"]
else:
continue
result_entries = _extract_response_data(response_data, exclude_stats)
result_df = pandas.DataFrame(result_entries)
result_df["identifier"] = identifier
dfs[idx] = result_df
return pandas.concat(dfs)


Expand All @@ -139,7 +152,7 @@ def _get_failed_batch_response(result_data: JsonDict) -> Union[str, List[Tuple[s
:param result_data: An input representation of the (Batch) Statistical API result of a geometry.
:return: Failed responses and responses with failed intervals
"""
if "error" in result_data or not result_data["response"]:
if "error" in result_data or result_data["response"]["status"] == "FAILED":
return _FULL_TIME_RANGE
return _get_failed_intervals(result_data["response"]["data"])

Expand Down
2 changes: 1 addition & 1 deletion tests/TestInputs/batch_stat_failed_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
"id": 3,
"identifier": "SI21.FOI.6620269001",
"response": null
"response": {"data":null,"status":"FAILED"}
},
{
"id": 4,
Expand Down
95 changes: 67 additions & 28 deletions tests/TestInputs/normal_stat_partial_result.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,75 @@
"to": "2020-06-13T00:00:00Z"
},
"outputs": {
"rgb": {
"ndvi": {
"bands": {
"R": {
"B0": {
"stats": {
"min": 0.004600000102072954,
"max": 0.7160000205039978,
"mean": 0.11546704480268109,
"stDev": 0.06332157724593372,
"sampleCount": 660657,
"noDataCount": 0
}
},
"B": {
"stats": {
"min": 0.05920000001788139,
"max": 0.5658000111579895,
"mean": 0.11913311262009575,
"stDev": 0.04636540384197817,
"sampleCount": 660657,
"noDataCount": 0
}
},
"G": {
"stats": {
"min": 0.03779999911785126,
"max": 0.6126000285148621,
"mean": 0.11290437308774512,
"stDev": 0.048338260231641964,
"sampleCount": 660657,
"noDataCount": 0
"min": 0.23480869829654694,
"max": 0.7734549045562744,
"mean": 0.38401383599010064,
"stDev": 0.11153442042832748,
"sampleCount": 112,
"noDataCount": 3,
"percentiles": {
"50.0": 0.3445783257484436
}
},
"histogram": {
"bins": [
{
"lowEdge": -1.0,
"highEdge": -0.8,
"count": 0
},
{
"lowEdge": -0.8,
"highEdge": -0.6,
"count": 0
},
{
"lowEdge": -0.6,
"highEdge": -0.3999999999999999,
"count": 0
},
{
"lowEdge": -0.3999999999999999,
"highEdge": -0.19999999999999996,
"count": 0
},
{
"lowEdge": -0.19999999999999996,
"highEdge": 0.0,
"count": 0
},
{
"lowEdge": 0.0,
"highEdge": 0.20000000000000018,
"count": 0
},
{
"lowEdge": 0.20000000000000018,
"highEdge": 0.40000000000000013,
"count": 68
},
{
"lowEdge": 0.40000000000000013,
"highEdge": 0.6000000000000001,
"count": 35
},
{
"lowEdge": 0.6000000000000001,
"highEdge": 0.8,
"count": 6
},
{
"lowEdge": 0.8,
"highEdge": 1.0,
"count": 0
}
],
"overflowCount": 0,
"underflowCount": 0
}
}
}
Expand Down
21 changes: 16 additions & 5 deletions tests/test_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,24 @@
]


def test_statistical_to_dataframe(input_folder: str) -> None:
batch_stat_results_path = os.path.join(input_folder, "batch_stat_results.json")
@pytest.mark.parametrize(
"result_file, expected_npolygons, expected_ncolumns, expected_nrows",
[
("batch_stat_results.json", 2, 12, 2),
("batch_stat_failed_results.json", 1, 12, 1),
("normal_stat_result.json", 1, 12, 1),
("normal_stat_partial_result.json", 1, 12, 1),
],
)
def test_statistical_to_dataframe(
input_folder: str, result_file: str, expected_npolygons: int, expected_ncolumns: int, expected_nrows: int
) -> None:
batch_stat_results_path = os.path.join(input_folder, result_file)
batch_stat_results = read_data(batch_stat_results_path)
df = statistical_to_dataframe(batch_stat_results)
assert len(set(df["identifier"])) == 2, "Wrong number of polygons"
assert len(df.columns) == 12, "Wrong number of columns"
assert len(df) == 2, "Wrong number of valid rows"
assert len(set(df["identifier"])) == expected_npolygons, "Wrong number of polygons"
assert len(df.columns) == expected_ncolumns, "Wrong number of columns"
assert len(df) == expected_nrows, "Wrong number of valid rows"
for data_type, columns in column_type_pairs:
assert all(isinstance(df[column].iloc[0], data_type) for column in columns), "Wrong data type of columns"

Expand Down

0 comments on commit eb60dac

Please sign in to comment.