fix CVE-2024-37388
(cherry picked from commit e861ad4dc5e50f1a111f3becbadc6278622dca2b)
This commit is contained in:
parent
a1ab747614
commit
891c7fac3c
78
Fix-test_elementtree-with-Expat-2.6.0.patch
Normal file
78
Fix-test_elementtree-with-Expat-2.6.0.patch
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
From e3012a702dea2b03830fe00a5e8f7a429bbc3f42 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Serhiy Storchaka <storchaka@gmail.com>
|
||||||
|
Date: Mon, 22 Apr 2024 16:52:26 +0800
|
||||||
|
Subject: [PATCH] Fix test_elementtree with Expat 2.6.0
|
||||||
|
|
||||||
|
---
|
||||||
|
src/lxml/tests/test_elementtree.py | 48 ++++++++++++++++--------------
|
||||||
|
1 file changed, 25 insertions(+), 23 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py
|
||||||
|
index 96426cb..d9cd47e 100644
|
||||||
|
--- a/src/lxml/tests/test_elementtree.py
|
||||||
|
+++ b/src/lxml/tests/test_elementtree.py
|
||||||
|
@@ -14,6 +14,7 @@ import copy
|
||||||
|
import io
|
||||||
|
import operator
|
||||||
|
import os
|
||||||
|
+import pyexpat
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import textwrap
|
||||||
|
@@ -4396,29 +4397,30 @@ class _XMLPullParserTest(unittest.TestCase):
|
||||||
|
self.assertEqual([(action, elem.tag) for action, elem in events],
|
||||||
|
expected)
|
||||||
|
|
||||||
|
- def test_simple_xml(self):
|
||||||
|
- for chunk_size in (None, 1, 5):
|
||||||
|
- #with self.subTest(chunk_size=chunk_size):
|
||||||
|
- parser = self.etree.XMLPullParser()
|
||||||
|
- self.assert_event_tags(parser, [])
|
||||||
|
- self._feed(parser, "<!-- comment -->\n", chunk_size)
|
||||||
|
- self.assert_event_tags(parser, [])
|
||||||
|
- self._feed(parser,
|
||||||
|
- "<root>\n <element key='value'>text</element",
|
||||||
|
- chunk_size)
|
||||||
|
- self.assert_event_tags(parser, [])
|
||||||
|
- self._feed(parser, ">\n", chunk_size)
|
||||||
|
- self.assert_event_tags(parser, [('end', 'element')])
|
||||||
|
- self._feed(parser, "<element>text</element>tail\n", chunk_size)
|
||||||
|
- self._feed(parser, "<empty-element/>\n", chunk_size)
|
||||||
|
- self.assert_event_tags(parser, [
|
||||||
|
- ('end', 'element'),
|
||||||
|
- ('end', 'empty-element'),
|
||||||
|
- ])
|
||||||
|
- self._feed(parser, "</root>\n", chunk_size)
|
||||||
|
- self.assert_event_tags(parser, [('end', 'root')])
|
||||||
|
- root = self._close_and_return_root(parser)
|
||||||
|
- self.assertEqual(root.tag, 'root')
|
||||||
|
+ def test_simple_xml(self, chunk_size=None):
|
||||||
|
+ parser = self.etree.XMLPullParser()
|
||||||
|
+ self.assert_event_tags(parser, [])
|
||||||
|
+ self._feed(parser, "<!-- comment -->\n", chunk_size)
|
||||||
|
+ self.assert_event_tags(parser, [])
|
||||||
|
+ self._feed(parser,
|
||||||
|
+ "<root>\n <element key='value'>text</element",
|
||||||
|
+ chunk_size)
|
||||||
|
+ self.assert_event_tags(parser, [])
|
||||||
|
+ self._feed(parser, ">\n", chunk_size)
|
||||||
|
+ self.assert_event_tags(parser, [('end', 'element')])
|
||||||
|
+ self._feed(parser, "<element>text</element>tail\n", chunk_size)
|
||||||
|
+ self._feed(parser, "<empty-element/>\n", chunk_size)
|
||||||
|
+ self.assert_event_tags(parser, [
|
||||||
|
+ ('end', 'element'),
|
||||||
|
+ ('end', 'empty-element'),
|
||||||
|
+ ])
|
||||||
|
+ self._feed(parser, "</root>\n", chunk_size)
|
||||||
|
+ self.assert_event_tags(parser, [('end', 'root')])
|
||||||
|
+ root = self._close_and_return_root(parser)
|
||||||
|
+ self.assertEqual(root.tag, 'root')
|
||||||
|
+
|
||||||
|
+ def test_simple_xml_chunk_22(self):
|
||||||
|
+ self.test_simple_xml(chunk_size=22)
|
||||||
|
|
||||||
|
def test_feed_while_iterating(self):
|
||||||
|
parser = self.etree.XMLPullParser()
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
372
backport-CVE-2024-37388.patch
Normal file
372
backport-CVE-2024-37388.patch
Normal file
@ -0,0 +1,372 @@
|
|||||||
|
From b38cebf2f846e92bd63de4488fd3d1c8b568f397 Mon Sep 17 00:00:00 2001
|
||||||
|
From: scoder <stefan_ml@behnel.de>
|
||||||
|
Date: Fri, 29 Dec 2023 14:21:23 +0100
|
||||||
|
Subject: [PATCH] Disable external entity resolution (XXE) by default (GH-391)
|
||||||
|
|
||||||
|
This prevents security risks that would allow loading arbitrary external files.
|
||||||
|
|
||||||
|
Closes https://bugs.launchpad.net/lxml/+bug/1742885
|
||||||
|
Supersedes https://github.com/lxml/lxml/pull/130
|
||||||
|
---
|
||||||
|
doc/FAQ.txt | 12 +++--
|
||||||
|
src/lxml/includes/xmlparser.pxd | 18 +++++++-
|
||||||
|
src/lxml/parser.pxi | 70 ++++++++++++++++++++++++++--
|
||||||
|
src/lxml/tests/test_etree.py | 81 +++++++++++++++++++++++++++++++++
|
||||||
|
4 files changed, 170 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/doc/FAQ.txt b/doc/FAQ.txt
|
||||||
|
index 48f69a6..7f3a524 100644
|
||||||
|
--- a/doc/FAQ.txt
|
||||||
|
+++ b/doc/FAQ.txt
|
||||||
|
@@ -1107,9 +1107,9 @@ useless for the data commonly sent through web services and
|
||||||
|
can simply be disabled, which rules out several types of
|
||||||
|
denial of service attacks at once. This also involves an attack
|
||||||
|
that reads local files from the server, as XML entities can be
|
||||||
|
-defined to expand into their content. Consequently, version
|
||||||
|
-1.2 of the SOAP standard explicitly disallows entity references
|
||||||
|
-in the XML stream.
|
||||||
|
+defined to expand into the content of external resources.
|
||||||
|
+Consequently, version 1.2 of the SOAP standard explicitly
|
||||||
|
+disallows entity references in the XML stream.
|
||||||
|
|
||||||
|
To disable entity expansion, use an XML parser that is configured
|
||||||
|
with the option ``resolve_entities=False``. Then, after (or
|
||||||
|
@@ -1117,7 +1117,11 @@ while) parsing the document, use ``root.iter(etree.Entity)`` to
|
||||||
|
recursively search for entity references. If it contains any,
|
||||||
|
reject the entire input document with a suitable error response.
|
||||||
|
In lxml 3.x, you can also use the new DTD introspection API to
|
||||||
|
-apply your own restrictions on input documents.
|
||||||
|
+apply your own restrictions on input documents. Since version 5.x,
|
||||||
|
+lxml disables the expansion of external entities (XXE) by default.
|
||||||
|
+If you really want to allow loading external files into XML documents
|
||||||
|
+using this functionality, you have to explicitly set
|
||||||
|
+``resolve_entities=True``.
|
||||||
|
|
||||||
|
Another attack to consider is compression bombs. If you allow
|
||||||
|
compressed input into your web service, attackers can try to send
|
||||||
|
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
|
||||||
|
index 45acfc8..3945495 100644
|
||||||
|
--- a/src/lxml/includes/xmlparser.pxd
|
||||||
|
+++ b/src/lxml/includes/xmlparser.pxd
|
||||||
|
@@ -1,9 +1,9 @@
|
||||||
|
from libc.string cimport const_char
|
||||||
|
|
||||||
|
from lxml.includes.tree cimport (
|
||||||
|
- xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||||
|
+ xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||||
|
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||||
|
-from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
|
||||||
|
+from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel
|
||||||
|
|
||||||
|
|
||||||
|
cdef extern from "libxml/parser.h":
|
||||||
|
@@ -47,11 +47,14 @@ cdef extern from "libxml/parser.h":
|
||||||
|
|
||||||
|
ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
|
||||||
|
|
||||||
|
+ ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name)
|
||||||
|
+
|
||||||
|
cdef int XML_SAX2_MAGIC
|
||||||
|
|
||||||
|
cdef extern from "libxml/tree.h":
|
||||||
|
ctypedef struct xmlParserInput:
|
||||||
|
int line
|
||||||
|
+ int col
|
||||||
|
int length
|
||||||
|
const_xmlChar* base
|
||||||
|
const_xmlChar* cur
|
||||||
|
@@ -76,6 +79,7 @@ cdef extern from "libxml/tree.h":
|
||||||
|
charactersSAXFunc characters
|
||||||
|
cdataBlockSAXFunc cdataBlock
|
||||||
|
referenceSAXFunc reference
|
||||||
|
+ getEntitySAXFunc getEntity
|
||||||
|
commentSAXFunc comment
|
||||||
|
processingInstructionSAXFunc processingInstruction
|
||||||
|
startDocumentSAXFunc startDocument
|
||||||
|
@@ -150,6 +154,8 @@ cdef extern from "libxml/parser.h":
|
||||||
|
int inSubset
|
||||||
|
int charset
|
||||||
|
xmlParserInput* input
|
||||||
|
+ int inputNr
|
||||||
|
+ xmlParserInput** inputTab
|
||||||
|
|
||||||
|
ctypedef enum xmlParserOption:
|
||||||
|
XML_PARSE_RECOVER = 1 # recover on errors
|
||||||
|
@@ -212,6 +218,12 @@ cdef extern from "libxml/parser.h":
|
||||||
|
char* filename, const_char* encoding,
|
||||||
|
int options) nogil
|
||||||
|
|
||||||
|
+ cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node,
|
||||||
|
+ int domain, int code, xmlErrorLevel level,
|
||||||
|
+ const xmlChar *str1, const xmlChar *str2, const xmlChar *str3,
|
||||||
|
+ int int1, const char *msg, ...)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
# iterparse:
|
||||||
|
|
||||||
|
cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||||
|
@@ -233,6 +245,8 @@ cdef extern from "libxml/parser.h":
|
||||||
|
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
|
||||||
|
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
|
||||||
|
|
||||||
|
+ cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) nogil
|
||||||
|
+
|
||||||
|
# DTDs:
|
||||||
|
|
||||||
|
cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||||
|
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
|
||||||
|
index 3187a38..2f0ce80 100644
|
||||||
|
--- a/src/lxml/parser.pxi
|
||||||
|
+++ b/src/lxml/parser.pxi
|
||||||
|
@@ -794,6 +794,7 @@ cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict,
|
||||||
|
c_attr = c_attr.next
|
||||||
|
return 0
|
||||||
|
|
||||||
|
+
|
||||||
|
@cython.internal
|
||||||
|
cdef class _BaseParser:
|
||||||
|
cdef ElementClassLookup _class_lookup
|
||||||
|
@@ -806,6 +807,7 @@ cdef class _BaseParser:
|
||||||
|
cdef bint _remove_pis
|
||||||
|
cdef bint _strip_cdata
|
||||||
|
cdef bint _collect_ids
|
||||||
|
+ cdef bint _resolve_external_entities
|
||||||
|
cdef XMLSchema _schema
|
||||||
|
cdef bytes _filename
|
||||||
|
cdef readonly object target
|
||||||
|
@@ -814,7 +816,7 @@ cdef class _BaseParser:
|
||||||
|
|
||||||
|
def __init__(self, int parse_options, bint for_html, XMLSchema schema,
|
||||||
|
remove_comments, remove_pis, strip_cdata, collect_ids,
|
||||||
|
- target, encoding):
|
||||||
|
+ target, encoding, bint resolve_external_entities=True):
|
||||||
|
cdef tree.xmlCharEncodingHandler* enchandler
|
||||||
|
cdef int c_encoding
|
||||||
|
if not isinstance(self, (XMLParser, HTMLParser)):
|
||||||
|
@@ -827,6 +829,7 @@ cdef class _BaseParser:
|
||||||
|
self._remove_pis = remove_pis
|
||||||
|
self._strip_cdata = strip_cdata
|
||||||
|
self._collect_ids = collect_ids
|
||||||
|
+ self._resolve_external_entities = resolve_external_entities
|
||||||
|
self._schema = schema
|
||||||
|
|
||||||
|
self._resolvers = _ResolverRegistry()
|
||||||
|
@@ -906,6 +909,8 @@ cdef class _BaseParser:
|
||||||
|
if self._strip_cdata:
|
||||||
|
# hard switch-off for CDATA nodes => makes them plain text
|
||||||
|
pctxt.sax.cdataBlock = NULL
|
||||||
|
+ if not self._resolve_external_entities:
|
||||||
|
+ pctxt.sax.getEntity = _getInternalEntityOnly
|
||||||
|
|
||||||
|
cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1:
|
||||||
|
cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax
|
||||||
|
@@ -1206,6 +1211,56 @@ cdef class _BaseParser:
|
||||||
|
finally:
|
||||||
|
context.cleanup()
|
||||||
|
|
||||||
|
+cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name):
|
||||||
|
+ """
|
||||||
|
+ Callback function to intercept the entity resolution when external entity loading is disabled.
|
||||||
|
+ """
|
||||||
|
+ cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name)
|
||||||
|
+ if not entity:
|
||||||
|
+ return NULL
|
||||||
|
+ if entity.etype not in (
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY,
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
|
||||||
|
+ tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY):
|
||||||
|
+ return entity
|
||||||
|
+
|
||||||
|
+ # Reject all external entities and fail the parsing instead. There is currently
|
||||||
|
+ # no way in libxml2 to just prevent the entity resolution in this case.
|
||||||
|
+ cdef xmlerror.xmlError c_error
|
||||||
|
+ cdef xmlerror.xmlStructuredErrorFunc err_func
|
||||||
|
+ cdef xmlparser.xmlParserInput* parser_input
|
||||||
|
+ cdef void* err_context
|
||||||
|
+
|
||||||
|
+ c_ctxt = <xmlparser.xmlParserCtxt *> ctxt
|
||||||
|
+ err_func = xmlerror.xmlStructuredError
|
||||||
|
+ if err_func:
|
||||||
|
+ parser_input = c_ctxt.input
|
||||||
|
+ # Copied from xmlVErrParser() in libxml2: get current input from stack.
|
||||||
|
+ if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1:
|
||||||
|
+ parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2]
|
||||||
|
+
|
||||||
|
+ c_error = xmlerror.xmlError(
|
||||||
|
+ domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER,
|
||||||
|
+ code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE,
|
||||||
|
+ level=xmlerror.xmlErrorLevel.XML_ERR_FATAL,
|
||||||
|
+ message=b"External entity resolution is disabled for security reasons "
|
||||||
|
+ b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' "
|
||||||
|
+ b"if you consider it safe to enable it.",
|
||||||
|
+ file=parser_input.filename,
|
||||||
|
+ node=entity,
|
||||||
|
+ str1=<char*> name,
|
||||||
|
+ str2=NULL,
|
||||||
|
+ str3=NULL,
|
||||||
|
+ line=parser_input.line if parser_input else 0,
|
||||||
|
+ int1=0,
|
||||||
|
+ int2=parser_input.col if parser_input else 0,
|
||||||
|
+ )
|
||||||
|
+ err_context = xmlerror.xmlStructuredErrorContext
|
||||||
|
+ err_func(err_context, &c_error)
|
||||||
|
+
|
||||||
|
+ c_ctxt.wellFormed = 0
|
||||||
|
+ # The entity was looked up and does not need to be freed.
|
||||||
|
+ return NULL
|
||||||
|
|
||||||
|
cdef void _initSaxDocument(void* ctxt) with gil:
|
||||||
|
xmlparser.xmlSAX2StartDocument(ctxt)
|
||||||
|
@@ -1508,12 +1563,14 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
- strip_cdata - replace CDATA sections by normal text content (default: True)
|
||||||
|
- compact - save memory for short text content (default: True)
|
||||||
|
- collect_ids - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
|
||||||
|
- - resolve_entities - replace entities by their text value (default: True)
|
||||||
|
- huge_tree - disable security restrictions and support very deep trees
|
||||||
|
and very long text content (only affects libxml2 2.7+)
|
||||||
|
|
||||||
|
Other keyword arguments:
|
||||||
|
-
|
||||||
|
+ - resolve_entities - replace entities by their text value: False for keeping the
|
||||||
|
+ entity references, True for resolving them, and 'internal' for resolving
|
||||||
|
+ internal definitions only (no external file/URL access).
|
||||||
|
+ The default used to be True and was changed to 'internal' in lxml 5.0.
|
||||||
|
- encoding - override the document encoding
|
||||||
|
- target - a parser target object that will receive the parse events
|
||||||
|
- schema - an XMLSchema to validate against
|
||||||
|
@@ -1525,10 +1582,11 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
def __init__(self, *, encoding=None, attribute_defaults=False,
|
||||||
|
dtd_validation=False, load_dtd=False, no_network=True,
|
||||||
|
ns_clean=False, recover=False, XMLSchema schema=None,
|
||||||
|
- huge_tree=False, remove_blank_text=False, resolve_entities=True,
|
||||||
|
+ huge_tree=False, remove_blank_text=False, resolve_entities='internal',
|
||||||
|
remove_comments=False, remove_pis=False, strip_cdata=True,
|
||||||
|
collect_ids=True, target=None, compact=True):
|
||||||
|
cdef int parse_options
|
||||||
|
+ cdef bint resolve_external = True
|
||||||
|
parse_options = _XML_DEFAULT_PARSE_OPTIONS
|
||||||
|
if load_dtd:
|
||||||
|
parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD
|
||||||
|
@@ -1553,12 +1611,14 @@ cdef class XMLParser(_FeedParser):
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT
|
||||||
|
if not resolve_entities:
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT
|
||||||
|
+ elif resolve_entities == 'internal':
|
||||||
|
+ resolve_external = False
|
||||||
|
if not strip_cdata:
|
||||||
|
parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
|
||||||
|
|
||||||
|
_BaseParser.__init__(self, parse_options, 0, schema,
|
||||||
|
remove_comments, remove_pis, strip_cdata,
|
||||||
|
- collect_ids, target, encoding)
|
||||||
|
+ collect_ids, target, encoding, resolve_external)
|
||||||
|
|
||||||
|
|
||||||
|
cdef class XMLPullParser(XMLParser):
|
||||||
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||||
|
index 14b21f7..bc7548f 100644
|
||||||
|
--- a/src/lxml/tests/test_etree.py
|
||||||
|
+++ b/src/lxml/tests/test_etree.py
|
||||||
|
@@ -12,11 +12,14 @@ from __future__ import absolute_import
|
||||||
|
from collections import OrderedDict
|
||||||
|
import os.path
|
||||||
|
import unittest
|
||||||
|
+import contextlib
|
||||||
|
import copy
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import gc
|
||||||
|
import operator
|
||||||
|
+import shutil
|
||||||
|
+import tempfile
|
||||||
|
import textwrap
|
||||||
|
import zlib
|
||||||
|
import gzip
|
||||||
|
@@ -1675,6 +1678,84 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||||
|
self.assertEqual(_bytes('<doc>&myentity;</doc>'),
|
||||||
|
tostring(root))
|
||||||
|
|
||||||
|
+ @contextlib.contextmanager
|
||||||
|
+ def _xml_test_file(self, name, content=b'<evil>XML</evil>'):
|
||||||
|
+ temp_dir = tempfile.mkdtemp()
|
||||||
|
+ try:
|
||||||
|
+ xml_file = os.path.join(temp_dir, name)
|
||||||
|
+ with open(xml_file, 'wb') as tmpfile:
|
||||||
|
+ tmpfile.write(content)
|
||||||
|
+ yield xml_file
|
||||||
|
+ finally:
|
||||||
|
+ shutil.rmtree(temp_dir)
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_external(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+ tostring = self.etree.tostring
|
||||||
|
+ parser = self.etree.XMLParser(resolve_entities=True)
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+ root = fromstring(xml, parser)
|
||||||
|
+
|
||||||
|
+ self.assertEqual(_bytes('<doc><evil>XML</evil></doc>'),
|
||||||
|
+ tostring(root))
|
||||||
|
+ self.assertEqual(root.tag, 'doc')
|
||||||
|
+ self.assertEqual(root[0].tag, 'evil')
|
||||||
|
+ self.assertEqual(root[0].text, 'XML')
|
||||||
|
+ self.assertEqual(root[0].tail, None)
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_external_no_resolve(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+ parser = self.etree.XMLParser(resolve_entities=False)
|
||||||
|
+ Entity = self.etree.Entity
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+ root = fromstring(xml, parser)
|
||||||
|
+
|
||||||
|
+ self.assertEqual(root[0].tag, Entity)
|
||||||
|
+ self.assertEqual(root[0].text, "&my_external_entity;")
|
||||||
|
+
|
||||||
|
+ def test_entity_parse_no_external_default(self):
|
||||||
|
+ fromstring = self.etree.fromstring
|
||||||
|
+
|
||||||
|
+ with self._xml_test_file("entity.xml") as entity_file:
|
||||||
|
+ xml = '''
|
||||||
|
+ <!DOCTYPE doc [
|
||||||
|
+ <!ENTITY my_failing_external_entity SYSTEM "%s">
|
||||||
|
+ ]>
|
||||||
|
+ <doc>&my_failing_external_entity;</doc>
|
||||||
|
+ ''' % path2url(entity_file)
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ fromstring(xml)
|
||||||
|
+ except self.etree.XMLSyntaxError as exc:
|
||||||
|
+ exception = exc
|
||||||
|
+ else:
|
||||||
|
+ self.assertTrue(False, "XMLSyntaxError was not raised")
|
||||||
|
+
|
||||||
|
+ self.assertIn("my_failing_external_entity", str(exception))
|
||||||
|
+ self.assertTrue(exception.error_log)
|
||||||
|
+ # Depending on the libxml2 version, we get different errors here,
|
||||||
|
+ # not necessarily the one that lxml produced. But it should fail either way.
|
||||||
|
+ for error in exception.error_log:
|
||||||
|
+ if "my_failing_external_entity" in error.message:
|
||||||
|
+ self.assertEqual(5, error.line)
|
||||||
|
+ break
|
||||||
|
+ else:
|
||||||
|
+ self.assertFalse("entity error not found in parser error log")
|
||||||
|
+
|
||||||
|
def test_entity_restructure(self):
|
||||||
|
xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
|
||||||
|
<root>
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -0,0 +1,220 @@
|
|||||||
|
From 72f5a287a4016ecb405f2e8a4a03ae22a5b0b496 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Behnel <stefan_ml@behnel.de>
|
||||||
|
Date: Wed, 5 Jul 2023 22:10:45 +0200
|
||||||
|
Subject: [PATCH] Change HTML "prefix" handling in ElementPath to let
|
||||||
|
"element.find('part1:part2')" search for "part1:part2" instead of just
|
||||||
|
"part2" with an unknown prefix. Also adapt the HTML "prefix" parsing test to
|
||||||
|
make it work in libxml2 2.10.4 and later, where HTML "prefixes" are kept as
|
||||||
|
part of the tag name by the parser.
|
||||||
|
|
||||||
|
---
|
||||||
|
src/lxml/_elementpath.py | 22 +++++++++++-----------
|
||||||
|
src/lxml/apihelpers.pxi | 7 +++++++
|
||||||
|
src/lxml/etree.pyx | 8 ++++----
|
||||||
|
src/lxml/includes/tree.pxd | 12 ++++++++++++
|
||||||
|
src/lxml/tests/test_etree.py | 20 ++++++++++++++++----
|
||||||
|
5 files changed, 50 insertions(+), 19 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py
|
||||||
|
index eabd81c..001b345 100644
|
||||||
|
--- a/src/lxml/_elementpath.py
|
||||||
|
+++ b/src/lxml/_elementpath.py
|
||||||
|
@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile(
|
||||||
|
r"\s+"
|
||||||
|
)
|
||||||
|
|
||||||
|
-def xpath_tokenizer(pattern, namespaces=None):
|
||||||
|
+def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
||||||
|
# ElementTree uses '', lxml used None originally.
|
||||||
|
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
||||||
|
parsing_attribute = False
|
||||||
|
for token in xpath_tokenizer_re.findall(pattern):
|
||||||
|
ttype, tag = token
|
||||||
|
if tag and tag[0] != "{":
|
||||||
|
- if ":" in tag:
|
||||||
|
+ if ":" in tag and with_prefixes:
|
||||||
|
prefix, uri = tag.split(":", 1)
|
||||||
|
try:
|
||||||
|
if not namespaces:
|
||||||
|
@@ -251,7 +251,7 @@ ops = {
|
||||||
|
_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
-def _build_path_iterator(path, namespaces):
|
||||||
|
+def _build_path_iterator(path, namespaces, with_prefixes=True):
|
||||||
|
"""compile selector pattern"""
|
||||||
|
if path[-1:] == "/":
|
||||||
|
path += "*" # implicit all (FIXME: keep this?)
|
||||||
|
@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces):
|
||||||
|
|
||||||
|
if path[:1] == "/":
|
||||||
|
raise SyntaxError("cannot use absolute path on element")
|
||||||
|
- stream = iter(xpath_tokenizer(path, namespaces))
|
||||||
|
+ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
|
||||||
|
try:
|
||||||
|
_next = stream.next
|
||||||
|
except AttributeError:
|
||||||
|
@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces):
|
||||||
|
##
|
||||||
|
# Iterate over the matching nodes
|
||||||
|
|
||||||
|
-def iterfind(elem, path, namespaces=None):
|
||||||
|
- selector = _build_path_iterator(path, namespaces)
|
||||||
|
+def iterfind(elem, path, namespaces=None, with_prefixes=True):
|
||||||
|
+ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
|
||||||
|
result = iter((elem,))
|
||||||
|
for select in selector:
|
||||||
|
result = select(result)
|
||||||
|
@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None):
|
||||||
|
##
|
||||||
|
# Find first matching object.
|
||||||
|
|
||||||
|
-def find(elem, path, namespaces=None):
|
||||||
|
- it = iterfind(elem, path, namespaces)
|
||||||
|
+def find(elem, path, namespaces=None, with_prefixes=True):
|
||||||
|
+ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
|
||||||
|
try:
|
||||||
|
return next(it)
|
||||||
|
except StopIteration:
|
||||||
|
@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None):
|
||||||
|
##
|
||||||
|
# Find all matching objects.
|
||||||
|
|
||||||
|
-def findall(elem, path, namespaces=None):
|
||||||
|
+def findall(elem, path, namespaces=None, with_prefixes=True):
|
||||||
|
return list(iterfind(elem, path, namespaces))
|
||||||
|
|
||||||
|
|
||||||
|
##
|
||||||
|
# Find text for first matching object.
|
||||||
|
|
||||||
|
-def findtext(elem, path, default=None, namespaces=None):
|
||||||
|
- el = find(elem, path, namespaces)
|
||||||
|
+def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
|
||||||
|
+ el = find(elem, path, namespaces, with_prefixes=with_prefixes)
|
||||||
|
if el is None:
|
||||||
|
return default
|
||||||
|
else:
|
||||||
|
diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi
|
||||||
|
index 88a031d..effd116 100644
|
||||||
|
--- a/src/lxml/apihelpers.pxi
|
||||||
|
+++ b/src/lxml/apihelpers.pxi
|
||||||
|
@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent):
|
||||||
|
finally:
|
||||||
|
return # swallow any exceptions
|
||||||
|
|
||||||
|
+cdef inline bint _isHtmlDocument(_Element element) except -1:
|
||||||
|
+ cdef xmlNode* c_node = element._c_node
|
||||||
|
+ return (
|
||||||
|
+ c_node is not NULL and c_node.doc is not NULL and
|
||||||
|
+ c_node.doc.properties & tree.XML_DOC_HTML != 0
|
||||||
|
+ )
|
||||||
|
+
|
||||||
|
cdef inline int _assertValidNode(_Element element) except -1:
|
||||||
|
assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
|
||||||
|
|
||||||
|
diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx
|
||||||
|
index 689c330..90753fc 100644
|
||||||
|
--- a/src/lxml/etree.pyx
|
||||||
|
+++ b/src/lxml/etree.pyx
|
||||||
|
@@ -1544,7 +1544,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
||||||
|
"""
|
||||||
|
if isinstance(path, QName):
|
||||||
|
path = (<QName>path).text
|
||||||
|
- return _elementpath.find(self, path, namespaces)
|
||||||
|
+ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
|
||||||
|
|
||||||
|
def findtext(self, path, default=None, namespaces=None):
|
||||||
|
u"""findtext(self, path, default=None, namespaces=None)
|
||||||
|
@@ -1557,7 +1557,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
||||||
|
"""
|
||||||
|
if isinstance(path, QName):
|
||||||
|
path = (<QName>path).text
|
||||||
|
- return _elementpath.findtext(self, path, default, namespaces)
|
||||||
|
+ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self))
|
||||||
|
|
||||||
|
def findall(self, path, namespaces=None):
|
||||||
|
u"""findall(self, path, namespaces=None)
|
||||||
|
@@ -1570,7 +1570,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
||||||
|
"""
|
||||||
|
if isinstance(path, QName):
|
||||||
|
path = (<QName>path).text
|
||||||
|
- return _elementpath.findall(self, path, namespaces)
|
||||||
|
+ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
|
||||||
|
|
||||||
|
def iterfind(self, path, namespaces=None):
|
||||||
|
u"""iterfind(self, path, namespaces=None)
|
||||||
|
@@ -1583,7 +1583,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
||||||
|
"""
|
||||||
|
if isinstance(path, QName):
|
||||||
|
path = (<QName>path).text
|
||||||
|
- return _elementpath.iterfind(self, path, namespaces)
|
||||||
|
+ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self))
|
||||||
|
|
||||||
|
def xpath(self, _path, *, namespaces=None, extensions=None,
|
||||||
|
smart_strings=True, **_variables):
|
||||||
|
diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd
|
||||||
|
index 010af80..d709313 100644
|
||||||
|
--- a/src/lxml/includes/tree.pxd
|
||||||
|
+++ b/src/lxml/includes/tree.pxd
|
||||||
|
@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h":
|
||||||
|
XML_EXTERNAL_PARAMETER_ENTITY= 5
|
||||||
|
XML_INTERNAL_PREDEFINED_ENTITY= 6
|
||||||
|
|
||||||
|
+ ctypedef enum xmlDocProperties:
|
||||||
|
+ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */
|
||||||
|
+ XML_DOC_NSVALID = 2 # /* document is Namespace valid */
|
||||||
|
+ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */
|
||||||
|
+ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */
|
||||||
|
+ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */
|
||||||
|
+ XML_DOC_USERBUILT = 32 # /* Document was built using the API
|
||||||
|
+ # and not by parsing an instance */
|
||||||
|
+ XML_DOC_INTERNAL = 64 # /* built for internal processing */
|
||||||
|
+ XML_DOC_HTML = 128 # /* parsed or built HTML document */
|
||||||
|
+
|
||||||
|
ctypedef struct xmlNs:
|
||||||
|
const_xmlChar* href
|
||||||
|
const_xmlChar* prefix
|
||||||
|
@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h":
|
||||||
|
void* _private
|
||||||
|
xmlDtd* intSubset
|
||||||
|
xmlDtd* extSubset
|
||||||
|
+ int properties
|
||||||
|
|
||||||
|
ctypedef struct xmlAttr:
|
||||||
|
void* _private
|
||||||
|
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
|
||||||
|
index bde496d..e969f3a 100644
|
||||||
|
--- a/src/lxml/tests/test_etree.py
|
||||||
|
+++ b/src/lxml/tests/test_etree.py
|
||||||
|
@@ -3137,11 +3137,23 @@ class ETreeOnlyTestCase(HelperTestCase):
|
||||||
|
|
||||||
|
def test_html_prefix_nsmap(self):
|
||||||
|
etree = self.etree
|
||||||
|
- el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
|
||||||
|
- if etree.LIBXML_VERSION < (2, 9, 11):
|
||||||
|
- self.assertEqual({'hha': None}, el.nsmap)
|
||||||
|
+ el = etree.HTML('<hha:page-description>aa</hha:page-description>')
|
||||||
|
+ pd = el[-1]
|
||||||
|
+ while len(pd):
|
||||||
|
+ pd = pd[-1]
|
||||||
|
+
|
||||||
|
+ if etree.LIBXML_VERSION >= (2, 9, 11):
|
||||||
|
+ # "Prefix" is kept as part of the tag name.
|
||||||
|
+ self.assertEqual("hha:page-description", pd.tag)
|
||||||
|
+ self.assertIsNone(el.find('.//page-description'))
|
||||||
|
+ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces!
|
||||||
|
+ for e in el.iter():
|
||||||
|
+ self.assertEqual({}, e.nsmap)
|
||||||
|
else:
|
||||||
|
- self.assertEqual({}, el.nsmap)
|
||||||
|
+ # "Prefix" is parsed as XML prefix.
|
||||||
|
+ self.assertEqual("page-description", pd.tag)
|
||||||
|
+ pd = el.find('.//page-description')
|
||||||
|
+ self.assertEqual({'hha': None}, pd.nsmap)
|
||||||
|
|
||||||
|
def test_getchildren(self):
|
||||||
|
Element = self.etree.Element
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
@ -7,7 +7,7 @@ The latest release works with all CPython versions from 2.7 to 3.7.
|
|||||||
|
|
||||||
Name: python-%{modname}
|
Name: python-%{modname}
|
||||||
Version: 4.7.1
|
Version: 4.7.1
|
||||||
Release: 5
|
Release: 6
|
||||||
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
|
Summary: XML processing library combining libxml2/libxslt with the ElementTree API
|
||||||
License: BSD
|
License: BSD
|
||||||
URL: https://files.pythonhosted.org
|
URL: https://files.pythonhosted.org
|
||||||
@ -15,6 +15,9 @@ Source0: https://files.pythonhosted.org/packages/source/l/lxml/lxml-%{ver
|
|||||||
|
|
||||||
Patch6000: backport-CVE-2022-2309.patch
|
Patch6000: backport-CVE-2022-2309.patch
|
||||||
Patch6001: backport-Work-around-libxml2-bug-in-affected-versions.patch
|
Patch6001: backport-Work-around-libxml2-bug-in-affected-versions.patch
|
||||||
|
Patch6002: Fix-test_elementtree-with-Expat-2.6.0.patch
|
||||||
|
Patch6003: backport-CVE-2024-37388.patch
|
||||||
|
Patch6004: backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch
|
||||||
|
|
||||||
BuildRequires: gcc libxml2-devel libxslt-devel
|
BuildRequires: gcc libxml2-devel libxslt-devel
|
||||||
|
|
||||||
@ -55,6 +58,12 @@ make test3
|
|||||||
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
|
%doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Jun 12 2024 zhuofeng <zhuofeng2@huawei.com> - 4.7.1-6
|
||||||
|
- Type:CVE
|
||||||
|
- CVE:CVE-2024-37388
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:fix CVE-2024-37388
|
||||||
|
|
||||||
* Wed Nov 16 2022 zhuofeng <zhuofeng@huawei.com> - 4.7.1-5
|
* Wed Nov 16 2022 zhuofeng <zhuofeng@huawei.com> - 4.7.1-5
|
||||||
- change the Source0
|
- change the Source0
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user