From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Fri, 6 Oct 2023 17:11:40 +0100 Subject: Update Python parser for RFCs 9110/9112 (#7662) **This is a backport of PR #7661 as merged into 3.9 (85713a4894610e848490915e5871ad71199348e2).** None Co-authored-by: Sam Bull --- aiohttp/http_parser.py | 85 ++++++++++++++++++++++++---------------- tests/test_http_parser.py | 99 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 146 insertions(+), 38 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 71ba815..0406bf4 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -5,7 +5,7 @@ import re import string import zlib from enum import IntEnum -from typing import Any, List, Optional, Tuple, Type, Union +from typing import Any, Final, List, Optional, Pattern, Tuple, Type, Union from multidict import CIMultiDict, CIMultiDictProxy, istr from yarl import URL @@ -45,16 +45,16 @@ __all__ = ( ASCIISET = set(string.printable) -# See https://tools.ietf.org/html/rfc7230#section-3.1.1 -# and https://tools.ietf.org/html/rfc7230#appendix-B +# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview +# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens # # method = token # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA # token = 1*tchar METHRE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") -VERSRE = re.compile(r"HTTP/(\d+).(\d+)") -HDRRE = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") +VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") +HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]") RawRequestMessage = collections.namedtuple( "RawRequestMessage", @@ -132,8 +132,11 @@ class HeadersParser: except ValueError: raise InvalidHeader(line) from None - bname = bname.strip(b" \t") - bvalue = bvalue.lstrip() + # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} + raise InvalidHeader(line) + + bvalue = bvalue.lstrip(b" \t") if HDRRE.search(bname): raise InvalidHeader(bname) if len(bname) > self.max_field_size: @@ -154,6 +157,7 @@ class HeadersParser: # consume continuation lines continuation = line and line[0] in (32, 9) # (' ', '\t') + # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding if continuation: bvalue_lst = [bvalue] while continuation: @@ -188,10 +192,14 @@ class HeadersParser: str(header_length), ) - bvalue = bvalue.strip() + bvalue = bvalue.strip(b" \t") name = bname.decode("utf-8", "surrogateescape") value = bvalue.decode("utf-8", "surrogateescape") + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + if "\n" in value or "\r" in value or "\x00" in value: + raise InvalidHeader(bvalue) + headers.add(name, value) raw_headers.append((bname, bvalue)) @@ -304,13 +312,13 @@ class HttpParser(abc.ABC): # payload length length = msg.headers.get(CONTENT_LENGTH) if length is not None: - try: - length = int(length) - except ValueError: - raise InvalidHeader(CONTENT_LENGTH) - if length < 0: + # Shouldn't allow +/- or other number formats. + # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 + if not length.strip(" \t").isdigit(): raise InvalidHeader(CONTENT_LENGTH) + length = int(length) + # do not support old websocket spec if SEC_WEBSOCKET_KEY1 in msg.headers: raise InvalidHeader(SEC_WEBSOCKET_KEY1) @@ -447,6 +455,24 @@ class HttpParser(abc.ABC): upgrade = False chunked = False + # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 + # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf + singletons = ( + hdrs.CONTENT_LENGTH, + hdrs.CONTENT_LOCATION, + hdrs.CONTENT_RANGE, + hdrs.CONTENT_TYPE, + hdrs.ETAG, + hdrs.HOST, + hdrs.MAX_FORWARDS, + hdrs.SERVER, + hdrs.TRANSFER_ENCODING, + hdrs.USER_AGENT, + ) + bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) + if bad_hdr is not None: + raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") + # keep-alive conn = headers.get(hdrs.CONNECTION) if conn: @@ -489,7 +515,7 @@ class HttpRequestParser(HttpParser): # request line line = lines[0].decode("utf-8", "surrogateescape") try: - method, path, version = line.split(None, 2) + method, path, version = line.split(maxsplit=2) except ValueError: raise BadStatusLine(line) from None @@ -506,14 +532,10 @@ class HttpRequestParser(HttpParser): raise BadStatusLine(method) # version - try: - if version.startswith("HTTP/"): - n1, n2 = version[5:].split(".", 1) - version_o = HttpVersion(int(n1), int(n2)) - else: - raise BadStatusLine(version) - except Exception: - raise BadStatusLine(version) + match = VERSRE.match(version) + if match is None: + raise BadStatusLine(line) + version_o = HttpVersion(int(match.group(1)), int(match.group(2))) # read headers ( @@ -563,12 +585,12 @@ class HttpResponseParser(HttpParser): def parse_message(self, lines: List[bytes]) -> Any: line = lines[0].decode("utf-8", "surrogateescape") try: - version, status = line.split(None, 1) + version, status = line.split(maxsplit=1) except ValueError: raise BadStatusLine(line) from None try: - status, reason = status.split(None, 1) + status, reason = status.split(maxsplit=1) except ValueError: reason = "" @@ -584,13 +606,9 @@ class HttpResponseParser(HttpParser): version_o = HttpVersion(int(match.group(1)), int(match.group(2))) # The status code is a three-digit number - try: - status_i = int(status) - except ValueError: - raise BadStatusLine(line) from None - - if status_i > 999: + if len(status) != 3 or not status.isdigit(): raise BadStatusLine(line) + status_i = int(status) # read headers ( @@ -725,14 +743,13 @@ class HttpPayloadParser: else: size_b = chunk[:pos] - try: - size = int(bytes(size_b), 16) - except ValueError: + if not size_b.isdigit(): exc = TransferEncodingError( chunk[:pos].decode("ascii", "surrogateescape") ) self.payload.set_exception(exc) - raise exc from None + raise exc + size = int(bytes(size_b), 16) chunk = chunk[pos + 2 :] if size == 0: # eof marker diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index ab39cdd..214afe8 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -3,6 +3,7 @@ import asyncio from unittest import mock from urllib.parse import quote +from typing import Any import pytest from multidict import CIMultiDict @@ -365,6 +366,83 @@ def test_invalid_name(parser) -> None: parser.feed_data(text) +def test_cve_2023_37276(parser: Any) -> None: + text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n""" + if isinstance(parser, HttpRequestParserPy): + with pytest.raises(aiohttp.http_exceptions.InvalidHeader): + parser.feed_data(text) + else: + pytest.xfail("Regression test for Py parser. May match C behaviour later.") + + +@pytest.mark.parametrize( + "hdr", + ( + "Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length + "Content-Length: +256", + "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 + "Bar: abc\ndef", + "Baz: abc\x00def", + "Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 + "Foo\t: bar", + ), +) +def test_bad_headers(parser: Any, hdr: str) -> None: + text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode() + if isinstance(parser, HttpRequestParserPy): + with pytest.raises(aiohttp.http_exceptions.InvalidHeader): + parser.feed_data(text) + elif hdr != "Foo : bar": + with pytest.raises(aiohttp.http_exceptions.BadHttpMessage): + parser.feed_data(text) + else: + pytest.xfail("Regression test for Py parser. May match C behaviour later.") + + +def test_bad_chunked_py(loop: Any, protocol: Any) -> None: + """Test that invalid chunked encoding doesn't allow content-length to be used.""" + parser = HttpRequestParserPy( + protocol, + loop, + 2**16, + max_line_size=8190, + max_field_size=8190, + ) + text = ( + b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n" + + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n" + ) + messages, upgrade, tail = parser.feed_data(text) + assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError) + + +@pytest.mark.skipif( + "HttpRequestParserC" not in dir(aiohttp.http_parser), + reason="C based HTTP parser not available", +) +def test_bad_chunked_c(loop: Any, protocol: Any) -> None: + """C parser behaves differently. Maybe we should align them later.""" + parser = HttpRequestParserC( + protocol, + loop, + 2**16, + max_line_size=8190, + max_field_size=8190, + ) + text = ( + b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n" + + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n" + ) + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + +def test_whitespace_before_header(parser: Any) -> None: + text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX" + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(text) + + @pytest.mark.parametrize("size", [40960, 8191]) def test_max_header_field_size(parser, size) -> None: name = b"t" * size @@ -546,6 +624,11 @@ def test_http_request_parser_bad_version(parser) -> None: parser.feed_data(b"GET //get HT/11\r\n\r\n") +def test_http_request_parser_bad_version_number(parser: Any) -> None: + with pytest.raises(http_exceptions.BadHttpMessage): + parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n") + + @pytest.mark.parametrize("size", [40965, 8191]) def test_http_request_max_status_line(parser, size) -> None: path = b"t" * (size - 5) @@ -613,6 +696,11 @@ def test_http_response_parser_bad_version(response) -> None: response.feed_data(b"HT/11 200 Ok\r\n\r\n") +def test_http_response_parser_bad_version_number(response) -> None: + with pytest.raises(http_exceptions.BadHttpMessage): + response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n") + + def test_http_response_parser_no_reason(response) -> None: msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0] @@ -627,17 +715,20 @@ def test_http_response_parser_bad(response) -> None: def test_http_response_parser_code_under_100(response) -> None: - msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0] - assert msg.code == 99 + if isinstance(response, HttpResponseParserPy): + with pytest.raises(aiohttp.http_exceptions.BadStatusLine): + response.feed_data(b"HTTP/1.1 99 test\r\n\r\n") + else: + pytest.xfail("Regression test for Py parser. May match C behaviour later.") def test_http_response_parser_code_above_999(response) -> None: - with pytest.raises(http_exceptions.BadHttpMessage): + with pytest.raises(http_exceptions.BadStatusLine): response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n") def test_http_response_parser_code_not_int(response) -> None: - with pytest.raises(http_exceptions.BadHttpMessage): + with pytest.raises(http_exceptions.BadStatusLine): response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")