137 lines
4.9 KiB
Diff
137 lines
4.9 KiB
Diff
From 5f6d7a202d833678ffdf9bdfef4d44d411bb6b31 Mon Sep 17 00:00:00 2001
|
|
From: xinsheng <xinsheng3@huawei.com>
|
|
Date: Tue, 3 Sep 2024 14:12:53 +0800
|
|
Subject: [PATCH] gh-123270: Replaced SanitizedNames with a more surgical fix.
|
|
(GH-123354)
|
|
|
|
---
|
|
Lib/test/test_zipfile.py | 72 +++++++++++++++++++
|
|
Lib/zipfile.py | 8 ++-
|
|
...-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst | 3 +
|
|
3 files changed, 81 insertions(+), 2 deletions(-)
|
|
create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
|
|
|
|
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
|
|
index b4ac17c..05b8b99 100644
|
|
--- a/Lib/test/test_zipfile.py
|
|
+++ b/Lib/test/test_zipfile.py
|
|
@@ -3045,6 +3045,78 @@ class TestPath(unittest.TestCase):
|
|
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
|
|
zipfile.CompleteDirs._implied_dirs(data)
|
|
|
|
+ def test_malformed_paths(self):
|
|
+ """
|
|
+ Path should handle malformed paths gracefully.
|
|
+ Paths with leading slashes are not visible.
|
|
+ Paths with dots are treated like regular files.
|
|
+ """
|
|
+ data = io.BytesIO()
|
|
+ zf = zipfile.ZipFile(data, "w")
|
|
+ zf.writestr("/one-slash.txt", b"content")
|
|
+ zf.writestr("//two-slash.txt", b"content")
|
|
+ zf.writestr("../parent.txt", b"content")
|
|
+ zf.filename = ''
|
|
+ root = zipfile.Path(zf)
|
|
+ assert list(map(str, root.iterdir())) == ['../']
|
|
+ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
|
|
+
|
|
+ def test_unsupported_names(self):
|
|
+ """
|
|
+ Path segments with special characters are readable.
|
|
+ On some platforms or file systems, characters like
|
|
+ ``:`` and ``?`` are not allowed, but they are valid
|
|
+ in the zip file.
|
|
+ """
|
|
+ data = io.BytesIO()
|
|
+ zf = zipfile.ZipFile(data, "w")
|
|
+ zf.writestr("path?", b"content")
|
|
+ zf.writestr("V: NMS.flac", b"fLaC...")
|
|
+ zf.filename = ''
|
|
+ root = zipfile.Path(zf)
|
|
+ contents = root.iterdir()
|
|
+ assert next(contents).name == 'path?'
|
|
+ assert next(contents).name == 'V: NMS.flac'
|
|
+ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
|
|
+
|
|
+ def test_backslash_not_separator(self):
|
|
+ """
|
|
+ In a zip file, backslashes are not separators.
|
|
+ """
|
|
+ data = io.BytesIO()
|
|
+ zf = zipfile.ZipFile(data, "w")
|
|
+ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
|
|
+ zf.filename = ''
|
|
+ root = zipfile.Path(zf)
|
|
+ (first,) = root.iterdir()
|
|
+ assert not first.is_dir()
|
|
+ assert first.name == 'foo\\bar'
|
|
+
|
|
+class DirtyZipInfo(zipfile.ZipInfo):
|
|
+ """
|
|
+ Bypass name sanitization.
|
|
+ """
|
|
+
|
|
+ def __init__(self, filename, *args, **kwargs):
|
|
+ super().__init__(filename, *args, **kwargs)
|
|
+ self.filename = filename
|
|
+
|
|
+ @classmethod
|
|
+ def for_name(cls, name, archive):
|
|
+ """
|
|
+ Construct the same way that ZipFile.writestr does.
|
|
+ TODO: extract this functionality and re-use
|
|
+ """
|
|
+ self = cls(filename=name, date_time=time.localtime(time.time())[:6])
|
|
+ self.compress_type = archive.compression
|
|
+ self.compress_level = archive.compresslevel
|
|
+ if self.filename.endswith('/'): # pragma: no cover
|
|
+ self.external_attr = 0o40775 << 16 # drwxrwxr-x
|
|
+ self.external_attr |= 0x10 # MS-DOS directory flag
|
|
+ else:
|
|
+ self.external_attr = 0o600 << 16 # ?rw-------
|
|
+ return self
|
|
+
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|
|
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
|
|
index a07a567..aeb4131 100644
|
|
--- a/Lib/zipfile.py
|
|
+++ b/Lib/zipfile.py
|
|
@@ -2137,7 +2137,7 @@ def _parents(path):
|
|
def _ancestry(path):
|
|
"""
|
|
Given a path with elements separated by
|
|
- posixpath.sep, generate all elements of that path
|
|
+ posixpath.sep, generate all elements of that path.
|
|
|
|
>>> list(_ancestry('b/d'))
|
|
['b/d', 'b']
|
|
@@ -2149,9 +2149,13 @@ def _ancestry(path):
|
|
['b']
|
|
>>> list(_ancestry(''))
|
|
[]
|
|
+
|
|
+ Multiple separators are treated like a single.
|
|
+ >>> list(_ancestry('//b//d///f//'))
|
|
+ ['//b//d///f', '//b//d', '//b']
|
|
"""
|
|
path = path.rstrip(posixpath.sep)
|
|
- while path and path != posixpath.sep:
|
|
+ while path.rstrip(posixpath.sep):
|
|
yield path
|
|
path, tail = posixpath.split(path)
|
|
|
|
diff --git a/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
|
|
new file mode 100644
|
|
index 0000000..ee9fde6
|
|
--- /dev/null
|
|
+++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst
|
|
@@ -0,0 +1,3 @@
|
|
+Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
|
|
+causing infinite loops (gh-122905) without breaking contents using
|
|
+legitimate characters.
|
|
--
|
|
2.43.0
|
|
|