python-aiohttp/CVE-2023-47627.patch
starlet-dx 9d926e8cf7 Fix CVE-2023-47627,CVE-2023-49082,CVE-2024-23334,CVE-2024-23829,CVE-2024-27306 and CVE-2024-30251
(cherry picked from commit dabdc40effcfef17ad7e2a967edf709e75859b23)
2025-03-06 10:11:34 +08:00

354 lines
13 KiB
Diff

From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com>
Date: Fri, 6 Oct 2023 17:11:40 +0100
Subject: Update Python parser for RFCs 9110/9112 (#7662)
**This is a backport of PR #7661 as merged into 3.9
(85713a4894610e848490915e5871ad71199348e2).**
None
Co-authored-by: Sam Bull <git@sambull.org>
---
aiohttp/http_parser.py | 85 ++++++++++++++++++++++++----------------
tests/test_http_parser.py | 99 +++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 146 insertions(+), 38 deletions(-)
diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
index 71ba815..0406bf4 100644
--- a/aiohttp/http_parser.py
+++ b/aiohttp/http_parser.py
@@ -5,7 +5,7 @@ import re
import string
import zlib
from enum import IntEnum
-from typing import Any, List, Optional, Tuple, Type, Union
+from typing import Any, Final, List, Optional, Pattern, Tuple, Type, Union
from multidict import CIMultiDict, CIMultiDictProxy, istr
from yarl import URL
@@ -45,16 +45,16 @@ __all__ = (
ASCIISET = set(string.printable)
-# See https://tools.ietf.org/html/rfc7230#section-3.1.1
-# and https://tools.ietf.org/html/rfc7230#appendix-B
+# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
+# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
#
# method = token
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
# token = 1*tchar
METHRE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
-VERSRE = re.compile(r"HTTP/(\d+).(\d+)")
-HDRRE = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
+HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
RawRequestMessage = collections.namedtuple(
"RawRequestMessage",
@@ -132,8 +132,11 @@ class HeadersParser:
except ValueError:
raise InvalidHeader(line) from None
- bname = bname.strip(b" \t")
- bvalue = bvalue.lstrip()
+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+ if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
+ raise InvalidHeader(line)
+
+ bvalue = bvalue.lstrip(b" \t")
if HDRRE.search(bname):
raise InvalidHeader(bname)
if len(bname) > self.max_field_size:
@@ -154,6 +157,7 @@ class HeadersParser:
# consume continuation lines
continuation = line and line[0] in (32, 9) # (' ', '\t')
+ # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
if continuation:
bvalue_lst = [bvalue]
while continuation:
@@ -188,10 +192,14 @@ class HeadersParser:
str(header_length),
)
- bvalue = bvalue.strip()
+ bvalue = bvalue.strip(b" \t")
name = bname.decode("utf-8", "surrogateescape")
value = bvalue.decode("utf-8", "surrogateescape")
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+ if "\n" in value or "\r" in value or "\x00" in value:
+ raise InvalidHeader(bvalue)
+
headers.add(name, value)
raw_headers.append((bname, bvalue))
@@ -304,13 +312,13 @@ class HttpParser(abc.ABC):
# payload length
length = msg.headers.get(CONTENT_LENGTH)
if length is not None:
- try:
- length = int(length)
- except ValueError:
- raise InvalidHeader(CONTENT_LENGTH)
- if length < 0:
+ # Shouldn't allow +/- or other number formats.
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
+ if not length.strip(" \t").isdigit():
raise InvalidHeader(CONTENT_LENGTH)
+ length = int(length)
+
# do not support old websocket spec
if SEC_WEBSOCKET_KEY1 in msg.headers:
raise InvalidHeader(SEC_WEBSOCKET_KEY1)
@@ -447,6 +455,24 @@ class HttpParser(abc.ABC):
upgrade = False
chunked = False
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
+ singletons = (
+ hdrs.CONTENT_LENGTH,
+ hdrs.CONTENT_LOCATION,
+ hdrs.CONTENT_RANGE,
+ hdrs.CONTENT_TYPE,
+ hdrs.ETAG,
+ hdrs.HOST,
+ hdrs.MAX_FORWARDS,
+ hdrs.SERVER,
+ hdrs.TRANSFER_ENCODING,
+ hdrs.USER_AGENT,
+ )
+ bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
+ if bad_hdr is not None:
+ raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
+
# keep-alive
conn = headers.get(hdrs.CONNECTION)
if conn:
@@ -489,7 +515,7 @@ class HttpRequestParser(HttpParser):
# request line
line = lines[0].decode("utf-8", "surrogateescape")
try:
- method, path, version = line.split(None, 2)
+ method, path, version = line.split(maxsplit=2)
except ValueError:
raise BadStatusLine(line) from None
@@ -506,14 +532,10 @@ class HttpRequestParser(HttpParser):
raise BadStatusLine(method)
# version
- try:
- if version.startswith("HTTP/"):
- n1, n2 = version[5:].split(".", 1)
- version_o = HttpVersion(int(n1), int(n2))
- else:
- raise BadStatusLine(version)
- except Exception:
- raise BadStatusLine(version)
+ match = VERSRE.match(version)
+ if match is None:
+ raise BadStatusLine(line)
+ version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
# read headers
(
@@ -563,12 +585,12 @@ class HttpResponseParser(HttpParser):
def parse_message(self, lines: List[bytes]) -> Any:
line = lines[0].decode("utf-8", "surrogateescape")
try:
- version, status = line.split(None, 1)
+ version, status = line.split(maxsplit=1)
except ValueError:
raise BadStatusLine(line) from None
try:
- status, reason = status.split(None, 1)
+ status, reason = status.split(maxsplit=1)
except ValueError:
reason = ""
@@ -584,13 +606,9 @@ class HttpResponseParser(HttpParser):
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
# The status code is a three-digit number
- try:
- status_i = int(status)
- except ValueError:
- raise BadStatusLine(line) from None
-
- if status_i > 999:
+ if len(status) != 3 or not status.isdigit():
raise BadStatusLine(line)
+ status_i = int(status)
# read headers
(
@@ -725,14 +743,13 @@ class HttpPayloadParser:
else:
size_b = chunk[:pos]
- try:
- size = int(bytes(size_b), 16)
- except ValueError:
+ if not size_b.isdigit():
exc = TransferEncodingError(
chunk[:pos].decode("ascii", "surrogateescape")
)
self.payload.set_exception(exc)
- raise exc from None
+ raise exc
+ size = int(bytes(size_b), 16)
chunk = chunk[pos + 2 :]
if size == 0: # eof marker
diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
index ab39cdd..214afe8 100644
--- a/tests/test_http_parser.py
+++ b/tests/test_http_parser.py
@@ -3,6 +3,7 @@
import asyncio
from unittest import mock
from urllib.parse import quote
+from typing import Any
import pytest
from multidict import CIMultiDict
@@ -365,6 +366,83 @@ def test_invalid_name(parser) -> None:
parser.feed_data(text)
+def test_cve_2023_37276(parser: Any) -> None:
+ text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
+ if isinstance(parser, HttpRequestParserPy):
+ with pytest.raises(aiohttp.http_exceptions.InvalidHeader):
+ parser.feed_data(text)
+ else:
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+
+
+@pytest.mark.parametrize(
+ "hdr",
+ (
+ "Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
+ "Content-Length: +256",
+ "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+ "Bar: abc\ndef",
+ "Baz: abc\x00def",
+ "Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+ "Foo\t: bar",
+ ),
+)
+def test_bad_headers(parser: Any, hdr: str) -> None:
+ text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
+ if isinstance(parser, HttpRequestParserPy):
+ with pytest.raises(aiohttp.http_exceptions.InvalidHeader):
+ parser.feed_data(text)
+ elif hdr != "Foo : bar":
+ with pytest.raises(aiohttp.http_exceptions.BadHttpMessage):
+ parser.feed_data(text)
+ else:
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+
+
+def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
+ """Test that invalid chunked encoding doesn't allow content-length to be used."""
+ parser = HttpRequestParserPy(
+ protocol,
+ loop,
+ 2**16,
+ max_line_size=8190,
+ max_field_size=8190,
+ )
+ text = (
+ b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
+ )
+ messages, upgrade, tail = parser.feed_data(text)
+ assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
+
+
+@pytest.mark.skipif(
+ "HttpRequestParserC" not in dir(aiohttp.http_parser),
+ reason="C based HTTP parser not available",
+)
+def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
+ """C parser behaves differently. Maybe we should align them later."""
+ parser = HttpRequestParserC(
+ protocol,
+ loop,
+ 2**16,
+ max_line_size=8190,
+ max_field_size=8190,
+ )
+ text = (
+ b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
+ + b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
+ )
+ with pytest.raises(http_exceptions.BadHttpMessage):
+ parser.feed_data(text)
+
+
+def test_whitespace_before_header(parser: Any) -> None:
+ text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
+ with pytest.raises(http_exceptions.BadHttpMessage):
+ parser.feed_data(text)
+
+
@pytest.mark.parametrize("size", [40960, 8191])
def test_max_header_field_size(parser, size) -> None:
name = b"t" * size
@@ -546,6 +624,11 @@ def test_http_request_parser_bad_version(parser) -> None:
parser.feed_data(b"GET //get HT/11\r\n\r\n")
+def test_http_request_parser_bad_version_number(parser: Any) -> None:
+ with pytest.raises(http_exceptions.BadHttpMessage):
+ parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
+
+
@pytest.mark.parametrize("size", [40965, 8191])
def test_http_request_max_status_line(parser, size) -> None:
path = b"t" * (size - 5)
@@ -613,6 +696,11 @@ def test_http_response_parser_bad_version(response) -> None:
response.feed_data(b"HT/11 200 Ok\r\n\r\n")
+def test_http_response_parser_bad_version_number(response) -> None:
+ with pytest.raises(http_exceptions.BadHttpMessage):
+ response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
+
+
def test_http_response_parser_no_reason(response) -> None:
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
@@ -627,17 +715,20 @@ def test_http_response_parser_bad(response) -> None:
def test_http_response_parser_code_under_100(response) -> None:
- msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
- assert msg.code == 99
+ if isinstance(response, HttpResponseParserPy):
+ with pytest.raises(aiohttp.http_exceptions.BadStatusLine):
+ response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
+ else:
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
def test_http_response_parser_code_above_999(response) -> None:
- with pytest.raises(http_exceptions.BadHttpMessage):
+ with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
def test_http_response_parser_code_not_int(response) -> None:
- with pytest.raises(http_exceptions.BadHttpMessage):
+ with pytest.raises(http_exceptions.BadStatusLine):
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")