-
Notifications
You must be signed in to change notification settings - Fork 692
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: enhance analysis options with od model dump and better vis (#3234)
This PR adds new capabilities for drawing bboxes for each layout (extracted, inferred, ocr and final) + OD model output dump as a json file for better analysis. --------- Co-authored-by: Christine Straub <[email protected]> Co-authored-by: Michal Martyniak <[email protected]>
- Loading branch information
1 parent
f2fee0c
commit 575957b
Showing
12 changed files
with
1,127 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -204,3 +204,6 @@ examples/**/output/ | |
|
||
outputdiff.txt | ||
metricsdiff.txt | ||
|
||
# analysis | ||
annotated/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
import numpy as np | ||
import pytest | ||
from PIL import Image | ||
from unstructured_inference.inference.elements import Rectangle | ||
from unstructured_inference.inference.layout import DocumentLayout, PageLayout | ||
from unstructured_inference.inference.layoutelement import LayoutElement | ||
|
||
from unstructured.partition.pdf_image.analysis.bbox_visualisation import ( | ||
TextAlignment, | ||
get_bbox_text_size, | ||
get_bbox_thickness, | ||
get_label_rect_and_coords, | ||
get_rgb_color, | ||
get_text_color, | ||
) | ||
from unstructured.partition.pdf_image.analysis.layout_dump import ObjectDetectionLayoutDumper | ||
|
||
|
||
@pytest.mark.parametrize("color", ["red", "green", "blue", "yellow", "black", "white"]) | ||
def test_get_rgb_color(color: str): | ||
color_tuple = get_rgb_color(color) | ||
|
||
assert isinstance(color_tuple, tuple) | ||
assert len(color_tuple) == 3 | ||
assert all(isinstance(c, int) for c in color_tuple) | ||
assert all(0 <= c <= 255 for c in color_tuple) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("bbox", "expected_text_size"), | ||
[ | ||
((0, 0, 90, 90), 17), | ||
((0, 0, 500, 200), 21), | ||
((0, 0, 10000, 10000), 32), | ||
], | ||
) | ||
def test_get_bbox_text_size(bbox: tuple[int, int, int, int], expected_text_size): | ||
page_size = (1700, 2200) # standard size of a page | ||
text_size = get_bbox_text_size(bbox, page_size) | ||
|
||
assert text_size == expected_text_size | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("bbox", "expected_box_thickness"), | ||
[ | ||
((0, 0, 90, 90), 1), | ||
((0, 0, 450, 250), 2), | ||
((0, 0, 600, 1000), 3), | ||
], | ||
) | ||
def test_get_bbox_thickness(bbox: tuple[int, int, int, int], expected_box_thickness): | ||
page_size = (1700, 2200) # standard size of a page | ||
box_thickness = get_bbox_thickness(bbox, page_size) | ||
|
||
assert box_thickness == expected_box_thickness | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("color", "expected_text_color"), | ||
[ | ||
("navy", "white"), | ||
("crimson", "white"), | ||
("maroon", "white"), | ||
("dimgray", "white"), | ||
("darkgreen", "white"), | ||
("darkcyan", "white"), | ||
("fuchsia", "white"), | ||
("violet", "black"), | ||
("gold", "black"), | ||
("aqua", "black"), | ||
("greenyellow", "black"), | ||
], | ||
) | ||
def test_best_text_color(color, expected_text_color): | ||
color_tuple = get_rgb_color(color) | ||
expected_text_color_tuple = get_rgb_color(expected_text_color) | ||
|
||
_, text_color_tuple = get_text_color(color_tuple) | ||
assert text_color_tuple == expected_text_color_tuple | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("alignment", "expected_text_bbox"), | ||
[ | ||
(TextAlignment.CENTER, ((145, 145), (155, 155))), | ||
(TextAlignment.TOP_LEFT, ((100, 90), (120, 100))), | ||
(TextAlignment.TOP_RIGHT, ((180, 100), (200, 110))), | ||
(TextAlignment.BOTTOM_LEFT, ((100, 190), (120, 200))), | ||
(TextAlignment.BOTTOM_RIGHT, ((180, 190), (200, 200))), | ||
], | ||
) | ||
def test_get_text_bbox(alignment, expected_text_bbox): | ||
text_bbox, text_xy = get_label_rect_and_coords( | ||
alignment=alignment, bbox_points=(100, 100, 200, 200), text_width=10, text_height=10 | ||
) | ||
# adding high atol to account for the text-based extending of the bbox | ||
assert np.allclose(text_bbox, expected_text_bbox, atol=10) | ||
|
||
|
||
def test_od_document_layout_dump(): | ||
page1 = PageLayout( | ||
number=1, | ||
image=Image.new("1", (1, 1)), | ||
image_metadata={"width": 100, "height": 100}, | ||
) | ||
page1.elements = [ | ||
LayoutElement(type="Title", bbox=Rectangle(x1=0, y1=0, x2=10, y2=10), prob=0.7), | ||
LayoutElement(type="Paragraph", bbox=Rectangle(x1=0, y1=100, x2=10, y2=110), prob=0.8), | ||
] | ||
page2 = PageLayout( | ||
number=2, | ||
image=Image.new("1", (1, 1)), | ||
image_metadata={"width": 100, "height": 100}, | ||
) | ||
page2.elements = [ | ||
LayoutElement(type="Table", bbox=Rectangle(x1=0, y1=0, x2=10, y2=10), prob=0.9), | ||
LayoutElement(type="Image", bbox=Rectangle(x1=0, y1=100, x2=10, y2=110), prob=1.0), | ||
] | ||
od_document_layout = DocumentLayout(pages=[page1, page2]) | ||
|
||
expected_dump = { | ||
"pages": [ | ||
{ | ||
"number": 1, | ||
"size": { | ||
"width": 100, | ||
"height": 100, | ||
}, | ||
"elements": [ | ||
{"bbox": [0, 0, 10, 10], "type": "Title", "prob": 0.7}, | ||
{"bbox": [0, 100, 10, 110], "type": "Paragraph", "prob": 0.8}, | ||
], | ||
}, | ||
{ | ||
"number": 2, | ||
"size": { | ||
"width": 100, | ||
"height": 100, | ||
}, | ||
"elements": [ | ||
{"bbox": [0, 0, 10, 10], "type": "Table", "prob": 0.9}, | ||
{"bbox": [0, 100, 10, 110], "type": "Image", "prob": 1.0}, | ||
], | ||
}, | ||
] | ||
} | ||
od_layout_dump = ObjectDetectionLayoutDumper(od_document_layout).dump() | ||
|
||
assert {"pages": od_layout_dump.get("pages")} == expected_dump | ||
|
||
# check OD model classes are attached but do not depend on a specific model instance | ||
assert "object_detection_classes" in od_layout_dump | ||
assert len(od_layout_dump["object_detection_classes"]) > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.