Skip to content

Commit

Permalink
Addresses wrong label generation
Browse files Browse the repository at this point in the history
We compute a wrong index for page labels enumerating the ASCII letter range, so that we always deliver label number +1 for a given page number.

This fix subtracts 1 from the index for label styles "a"/"A".
  • Loading branch information
JorjMcKie committed May 23, 2024
1 parent 036f9a9 commit f26b673
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
4 changes: 3 additions & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4964,7 +4964,9 @@ def get_label_pno(pgNo, labels):
rule = rule_dict(item)
prefix = rule.get("prefix", "")
style = rule.get("style", "")
pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"]
# make sure we start at 0 when enumerating the alphabet
delta = -1 if style in ("a", "A") else 0
pagenumber = pgNo - rule["startpage"] + rule["firstpagenum"] + delta
return construct_label(style, prefix, pagenumber)


Expand Down
20 changes: 19 additions & 1 deletion tests/test_pagelabels.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Define some page labels in a PDF.
Check success in various aspects.
"""

import pymupdf


Expand Down Expand Up @@ -35,6 +36,23 @@ def test_setlabels():
doc.set_page_labels(make_labels())
page_labels = [p.get_label() for p in doc]
answer = ["A-1", "A-2", "A-3", "A-4", "I", "II", "III", "IV", "V", "VI"]
assert page_labels == answer, f'page_labels={page_labels}'
assert page_labels == answer, f"page_labels={page_labels}"
assert doc.get_page_numbers("V") == [8]
assert doc.get_page_labels() == make_labels()


def test_labels_styleA():
"""Test correct indexing for styles "a", "A"."""
doc = make_doc()
labels = [
{"startpage": 0, "prefix": "", "style": "a", "firstpagenum": 1},
{"startpage": 5, "prefix": "", "style": "A", "firstpagenum": 1},
]
doc.set_page_labels(labels)
pdfdata = doc.tobytes()
doc.close()
doc = pymupdf.open("pdf", pdfdata)
answer = ["a", "b", "c", "d", "e", "A", "B", "C", "D", "E"]
page_labels = [page.get_label() for page in doc]
assert page_labels == answer
assert doc.get_page_labels() == labels

0 comments on commit f26b673

Please sign in to comment.