Skip to content

Commit

Permalink
improve parse_options_header performance (#2939)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidism committed Aug 21, 2024
2 parents 3a52597 + 3a893d2 commit 7abec4b
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ Unreleased
- Restore behavior where parsing `multipart/x-www-form-urlencoded` data with
invalid UTF-8 bytes in the body results in no form data parsed rather than a
413 error. :issue:`2930`
- Improve ``parse_options_header`` performance when parsing unterminated
quoted string values. :issue:`2907`


Version 3.0.3
Expand Down
65 changes: 41 additions & 24 deletions src/werkzeug/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,22 +395,8 @@ def parse_dict_header(value: str) -> dict[str, str | None]:


# https://httpwg.org/specs/rfc9110.html#parameter
_parameter_re = re.compile(
r"""
# don't match multiple empty parts, that causes backtracking
\s*;\s* # find the part delimiter
(?:
([\w!#$%&'*+\-.^`|~]+) # key, one or more token chars
= # equals, with no space on either side
( # value, token or quoted string
[\w!#$%&'*+\-.^`|~]+ # one or more token chars
|
"(?:\\\\|\\"|.)*?" # quoted string, consuming slash escapes
)
)? # optionally match key=value, to account for empty parts
""",
re.ASCII | re.VERBOSE,
)
_parameter_key_re = re.compile(r"([\w!#$%&'*+\-.^`|~]+)=", flags=re.ASCII)
_parameter_token_value_re = re.compile(r"[\w!#$%&'*+\-.^`|~]+", flags=re.ASCII)
# https://www.rfc-editor.org/rfc/rfc2231#section-4
_charset_value_re = re.compile(
r"""
Expand Down Expand Up @@ -492,18 +478,49 @@ def parse_options_header(value: str | None) -> tuple[str, dict[str, str]]:
# empty (invalid) value, or value without options
return value, {}

rest = f";{rest}"
# Collect all valid key=value parts without processing the value.
parts: list[tuple[str, str]] = []

while True:
if (m := _parameter_key_re.match(rest)) is not None:
pk = m.group(1).lower()
rest = rest[m.end() :]

# Value may be a token.
if (m := _parameter_token_value_re.match(rest)) is not None:
parts.append((pk, m.group()))

# Value may be a quoted string, find the closing quote.
elif rest[:1] == '"':
pos = 1
length = len(rest)

while pos < length:
if rest[pos : pos + 2] in {"\\\\", '\\"'}:
# Consume escaped slashes and quotes.
pos += 2
elif rest[pos] == '"':
# Stop at an unescaped quote.
parts.append((pk, rest[: pos + 1]))
rest = rest[pos + 1 :]
break
else:
# Consume any other character.
pos += 1

# Find the next section delimited by `;`, if any.
if (end := rest.find(";")) == -1:
break

rest = rest[end + 1 :].lstrip()

options: dict[str, str] = {}
encoding: str | None = None
continued_encoding: str | None = None

for pk, pv in _parameter_re.findall(rest):
if not pk:
# empty or invalid part
continue

pk = pk.lower()

# For each collected part, process optional charset and continuation,
# unquote quoted values.
for pk, pv in parts:
if pk[-1] == "*":
# key*=charset''value becomes key=value, where value is percent encoded
pk = pk[:-1]
Expand Down
6 changes: 4 additions & 2 deletions tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ def test_parse_options_header_empty(self, value, expect):
('v;a="b\\"c";d=e', {"a": 'b"c', "d": "e"}),
# HTTP headers use \\ for internal \
('v;a="c:\\\\"', {"a": "c:\\"}),
# Invalid trailing slash in quoted part is left as-is.
('v;a="c:\\"', {"a": "c:\\"}),
# Part with invalid trailing slash is discarded.
('v;a="c:\\"', {}),
('v;a="b\\\\\\"c"', {"a": 'b\\"c'}),
# multipart form data uses %22 for internal "
('v;a="b%22c"', {"a": 'b"c'}),
Expand All @@ -377,6 +377,8 @@ def test_parse_options_header_empty(self, value, expect):
("v;a*0=b;a*1=c;d=e", {"a": "bc", "d": "e"}),
("v;a*0*=b", {"a": "b"}),
("v;a*0*=UTF-8''b;a*1=c;a*2*=%C2%B5", {"a": "bcµ"}),
# Long invalid quoted string with trailing slashes does not freeze.
('v;a="' + "\\" * 400, {}),
],
)
def test_parse_options_header(self, value, expect) -> None:
Expand Down

0 comments on commit 7abec4b

Please sign in to comment.