354 lines
13 KiB
Diff
354 lines
13 KiB
Diff
From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com>
|
|
Date: Fri, 6 Oct 2023 17:11:40 +0100
|
|
Subject: Update Python parser for RFCs 9110/9112 (#7662)
|
|
|
|
**This is a backport of PR #7661 as merged into 3.9
|
|
(85713a4894610e848490915e5871ad71199348e2).**
|
|
|
|
None
|
|
|
|
Co-authored-by: Sam Bull <git@sambull.org>
|
|
---
|
|
aiohttp/http_parser.py | 85 ++++++++++++++++++++++++----------------
|
|
tests/test_http_parser.py | 99 +++++++++++++++++++++++++++++++++++++++++++++--
|
|
2 files changed, 146 insertions(+), 38 deletions(-)
|
|
|
|
diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
|
|
index 71ba815..0406bf4 100644
|
|
--- a/aiohttp/http_parser.py
|
|
+++ b/aiohttp/http_parser.py
|
|
@@ -5,7 +5,7 @@ import re
|
|
import string
|
|
import zlib
|
|
from enum import IntEnum
|
|
-from typing import Any, List, Optional, Tuple, Type, Union
|
|
+from typing import Any, Final, List, Optional, Pattern, Tuple, Type, Union
|
|
|
|
from multidict import CIMultiDict, CIMultiDictProxy, istr
|
|
from yarl import URL
|
|
@@ -45,16 +45,16 @@ __all__ = (
|
|
|
|
ASCIISET = set(string.printable)
|
|
|
|
-# See https://tools.ietf.org/html/rfc7230#section-3.1.1
|
|
-# and https://tools.ietf.org/html/rfc7230#appendix-B
|
|
+# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
|
|
+# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
|
|
#
|
|
# method = token
|
|
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
|
|
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
|
|
# token = 1*tchar
|
|
METHRE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
|
|
-VERSRE = re.compile(r"HTTP/(\d+).(\d+)")
|
|
-HDRRE = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
|
|
+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
|
|
+HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
|
|
|
|
RawRequestMessage = collections.namedtuple(
|
|
"RawRequestMessage",
|
|
@@ -132,8 +132,11 @@ class HeadersParser:
|
|
except ValueError:
|
|
raise InvalidHeader(line) from None
|
|
|
|
- bname = bname.strip(b" \t")
|
|
- bvalue = bvalue.lstrip()
|
|
+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
|
|
+ if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
|
|
+ raise InvalidHeader(line)
|
|
+
|
|
+ bvalue = bvalue.lstrip(b" \t")
|
|
if HDRRE.search(bname):
|
|
raise InvalidHeader(bname)
|
|
if len(bname) > self.max_field_size:
|
|
@@ -154,6 +157,7 @@ class HeadersParser:
|
|
# consume continuation lines
|
|
continuation = line and line[0] in (32, 9) # (' ', '\t')
|
|
|
|
+ # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
|
|
if continuation:
|
|
bvalue_lst = [bvalue]
|
|
while continuation:
|
|
@@ -188,10 +192,14 @@ class HeadersParser:
|
|
str(header_length),
|
|
)
|
|
|
|
- bvalue = bvalue.strip()
|
|
+ bvalue = bvalue.strip(b" \t")
|
|
name = bname.decode("utf-8", "surrogateescape")
|
|
value = bvalue.decode("utf-8", "surrogateescape")
|
|
|
|
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
|
|
+ if "\n" in value or "\r" in value or "\x00" in value:
|
|
+ raise InvalidHeader(bvalue)
|
|
+
|
|
headers.add(name, value)
|
|
raw_headers.append((bname, bvalue))
|
|
|
|
@@ -304,13 +312,13 @@ class HttpParser(abc.ABC):
|
|
# payload length
|
|
length = msg.headers.get(CONTENT_LENGTH)
|
|
if length is not None:
|
|
- try:
|
|
- length = int(length)
|
|
- except ValueError:
|
|
- raise InvalidHeader(CONTENT_LENGTH)
|
|
- if length < 0:
|
|
+ # Shouldn't allow +/- or other number formats.
|
|
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
|
|
+ if not length.strip(" \t").isdigit():
|
|
raise InvalidHeader(CONTENT_LENGTH)
|
|
|
|
+ length = int(length)
|
|
+
|
|
# do not support old websocket spec
|
|
if SEC_WEBSOCKET_KEY1 in msg.headers:
|
|
raise InvalidHeader(SEC_WEBSOCKET_KEY1)
|
|
@@ -447,6 +455,24 @@ class HttpParser(abc.ABC):
|
|
upgrade = False
|
|
chunked = False
|
|
|
|
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
|
|
+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
|
|
+ singletons = (
|
|
+ hdrs.CONTENT_LENGTH,
|
|
+ hdrs.CONTENT_LOCATION,
|
|
+ hdrs.CONTENT_RANGE,
|
|
+ hdrs.CONTENT_TYPE,
|
|
+ hdrs.ETAG,
|
|
+ hdrs.HOST,
|
|
+ hdrs.MAX_FORWARDS,
|
|
+ hdrs.SERVER,
|
|
+ hdrs.TRANSFER_ENCODING,
|
|
+ hdrs.USER_AGENT,
|
|
+ )
|
|
+ bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
|
|
+ if bad_hdr is not None:
|
|
+ raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
|
|
+
|
|
# keep-alive
|
|
conn = headers.get(hdrs.CONNECTION)
|
|
if conn:
|
|
@@ -489,7 +515,7 @@ class HttpRequestParser(HttpParser):
|
|
# request line
|
|
line = lines[0].decode("utf-8", "surrogateescape")
|
|
try:
|
|
- method, path, version = line.split(None, 2)
|
|
+ method, path, version = line.split(maxsplit=2)
|
|
except ValueError:
|
|
raise BadStatusLine(line) from None
|
|
|
|
@@ -506,14 +532,10 @@ class HttpRequestParser(HttpParser):
|
|
raise BadStatusLine(method)
|
|
|
|
# version
|
|
- try:
|
|
- if version.startswith("HTTP/"):
|
|
- n1, n2 = version[5:].split(".", 1)
|
|
- version_o = HttpVersion(int(n1), int(n2))
|
|
- else:
|
|
- raise BadStatusLine(version)
|
|
- except Exception:
|
|
- raise BadStatusLine(version)
|
|
+ match = VERSRE.match(version)
|
|
+ if match is None:
|
|
+ raise BadStatusLine(line)
|
|
+ version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
|
|
|
|
# read headers
|
|
(
|
|
@@ -563,12 +585,12 @@ class HttpResponseParser(HttpParser):
|
|
def parse_message(self, lines: List[bytes]) -> Any:
|
|
line = lines[0].decode("utf-8", "surrogateescape")
|
|
try:
|
|
- version, status = line.split(None, 1)
|
|
+ version, status = line.split(maxsplit=1)
|
|
except ValueError:
|
|
raise BadStatusLine(line) from None
|
|
|
|
try:
|
|
- status, reason = status.split(None, 1)
|
|
+ status, reason = status.split(maxsplit=1)
|
|
except ValueError:
|
|
reason = ""
|
|
|
|
@@ -584,13 +606,9 @@ class HttpResponseParser(HttpParser):
|
|
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
|
|
|
|
# The status code is a three-digit number
|
|
- try:
|
|
- status_i = int(status)
|
|
- except ValueError:
|
|
- raise BadStatusLine(line) from None
|
|
-
|
|
- if status_i > 999:
|
|
+ if len(status) != 3 or not status.isdigit():
|
|
raise BadStatusLine(line)
|
|
+ status_i = int(status)
|
|
|
|
# read headers
|
|
(
|
|
@@ -725,14 +743,13 @@ class HttpPayloadParser:
|
|
else:
|
|
size_b = chunk[:pos]
|
|
|
|
- try:
|
|
- size = int(bytes(size_b), 16)
|
|
- except ValueError:
|
|
+ if not size_b.isdigit():
|
|
exc = TransferEncodingError(
|
|
chunk[:pos].decode("ascii", "surrogateescape")
|
|
)
|
|
self.payload.set_exception(exc)
|
|
- raise exc from None
|
|
+ raise exc
|
|
+ size = int(bytes(size_b), 16)
|
|
|
|
chunk = chunk[pos + 2 :]
|
|
if size == 0: # eof marker
|
|
diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
|
|
index ab39cdd..214afe8 100644
|
|
--- a/tests/test_http_parser.py
|
|
+++ b/tests/test_http_parser.py
|
|
@@ -3,6 +3,7 @@
|
|
import asyncio
|
|
from unittest import mock
|
|
from urllib.parse import quote
|
|
+from typing import Any
|
|
|
|
import pytest
|
|
from multidict import CIMultiDict
|
|
@@ -365,6 +366,83 @@ def test_invalid_name(parser) -> None:
|
|
parser.feed_data(text)
|
|
|
|
|
|
+def test_cve_2023_37276(parser: Any) -> None:
|
|
+ text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
|
|
+ if isinstance(parser, HttpRequestParserPy):
|
|
+ with pytest.raises(aiohttp.http_exceptions.InvalidHeader):
|
|
+ parser.feed_data(text)
|
|
+ else:
|
|
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
|
|
+
|
|
+
|
|
+@pytest.mark.parametrize(
|
|
+ "hdr",
|
|
+ (
|
|
+ "Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
|
|
+ "Content-Length: +256",
|
|
+ "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
|
|
+ "Bar: abc\ndef",
|
|
+ "Baz: abc\x00def",
|
|
+ "Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
|
|
+ "Foo\t: bar",
|
|
+ ),
|
|
+)
|
|
+def test_bad_headers(parser: Any, hdr: str) -> None:
|
|
+ text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
|
|
+ if isinstance(parser, HttpRequestParserPy):
|
|
+ with pytest.raises(aiohttp.http_exceptions.InvalidHeader):
|
|
+ parser.feed_data(text)
|
|
+ elif hdr != "Foo : bar":
|
|
+ with pytest.raises(aiohttp.http_exceptions.BadHttpMessage):
|
|
+ parser.feed_data(text)
|
|
+ else:
|
|
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
|
|
+
|
|
+
|
|
+def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
|
|
+ """Test that invalid chunked encoding doesn't allow content-length to be used."""
|
|
+ parser = HttpRequestParserPy(
|
|
+ protocol,
|
|
+ loop,
|
|
+ 2**16,
|
|
+ max_line_size=8190,
|
|
+ max_field_size=8190,
|
|
+ )
|
|
+ text = (
|
|
+ b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
|
|
+ + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
|
|
+ )
|
|
+ messages, upgrade, tail = parser.feed_data(text)
|
|
+ assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
|
|
+
|
|
+
|
|
+@pytest.mark.skipif(
|
|
+ "HttpRequestParserC" not in dir(aiohttp.http_parser),
|
|
+ reason="C based HTTP parser not available",
|
|
+)
|
|
+def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
|
|
+ """C parser behaves differently. Maybe we should align them later."""
|
|
+ parser = HttpRequestParserC(
|
|
+ protocol,
|
|
+ loop,
|
|
+ 2**16,
|
|
+ max_line_size=8190,
|
|
+ max_field_size=8190,
|
|
+ )
|
|
+ text = (
|
|
+ b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
|
|
+ + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
|
|
+ )
|
|
+ with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ parser.feed_data(text)
|
|
+
|
|
+
|
|
+def test_whitespace_before_header(parser: Any) -> None:
|
|
+ text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
|
|
+ with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ parser.feed_data(text)
|
|
+
|
|
+
|
|
@pytest.mark.parametrize("size", [40960, 8191])
|
|
def test_max_header_field_size(parser, size) -> None:
|
|
name = b"t" * size
|
|
@@ -546,6 +624,11 @@ def test_http_request_parser_bad_version(parser) -> None:
|
|
parser.feed_data(b"GET //get HT/11\r\n\r\n")
|
|
|
|
|
|
+def test_http_request_parser_bad_version_number(parser: Any) -> None:
|
|
+ with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
|
|
+
|
|
+
|
|
@pytest.mark.parametrize("size", [40965, 8191])
|
|
def test_http_request_max_status_line(parser, size) -> None:
|
|
path = b"t" * (size - 5)
|
|
@@ -613,6 +696,11 @@ def test_http_response_parser_bad_version(response) -> None:
|
|
response.feed_data(b"HT/11 200 Ok\r\n\r\n")
|
|
|
|
|
|
+def test_http_response_parser_bad_version_number(response) -> None:
|
|
+ with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
|
|
+
|
|
+
|
|
def test_http_response_parser_no_reason(response) -> None:
|
|
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
|
|
|
|
@@ -627,17 +715,20 @@ def test_http_response_parser_bad(response) -> None:
|
|
|
|
|
|
def test_http_response_parser_code_under_100(response) -> None:
|
|
- msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
|
|
- assert msg.code == 99
|
|
+ if isinstance(response, HttpResponseParserPy):
|
|
+ with pytest.raises(aiohttp.http_exceptions.BadStatusLine):
|
|
+ response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
|
|
+ else:
|
|
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
|
|
|
|
|
|
def test_http_response_parser_code_above_999(response) -> None:
|
|
- with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ with pytest.raises(http_exceptions.BadStatusLine):
|
|
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
|
|
|
|
|
|
def test_http_response_parser_code_not_int(response) -> None:
|
|
- with pytest.raises(http_exceptions.BadHttpMessage):
|
|
+ with pytest.raises(http_exceptions.BadStatusLine):
|
|
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
|
|
|
|
|