303 lines
13 KiB
Diff
303 lines
13 KiB
Diff
From 42deeab5b2efc2930d4eb73416e1dde9cf790dd2 Mon Sep 17 00:00:00 2001
|
|
From: Petr Viktorin <encukou@gmail.com>
|
|
Date: Tue, 22 Aug 2023 20:28:10 +0200
|
|
Subject: [PATCH] [3.9] gh-107845: Fix symlink handling for tarfile.data_filter
|
|
(GH-107846) (#108274)
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
(cherry picked from commit acbd3f9c5c5f23e95267714e41236140d84fe962)
|
|
|
|
Co-authored-by: Petr Viktorin <encukou@gmail.com>
|
|
Co-authored-by: Victor Stinner <vstinner@python.org>
|
|
Co-authored-by: Lumír 'Frenzy' Balhar <frenzy.madness@gmail.com>
|
|
---
|
|
Doc/library/tarfile.rst | 5 +
|
|
Lib/tarfile.py | 11 +-
|
|
Lib/test/test_tarfile.py | 144 +++++++++++++++++-
|
|
...-08-10-17-36-22.gh-issue-107845.dABiMJ.rst | 3 +
|
|
4 files changed, 154 insertions(+), 9 deletions(-)
|
|
create mode 100644 Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst
|
|
|
|
diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
|
|
index f1d9bd34536..165088529a8 100644
|
|
--- a/Doc/library/tarfile.rst
|
|
+++ b/Doc/library/tarfile.rst
|
|
@@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes:
|
|
Name of the target file name, which is only present in :class:`TarInfo` objects
|
|
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
|
|
|
|
+ For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
|
|
+ that contains the link.
|
|
+ For hard links (``LNKTYPE``), the *linkname* is relative to the root of
|
|
+ the archive.
|
|
+
|
|
|
|
.. attribute:: TarInfo.uid
|
|
:type: int
|
|
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
|
|
index b6ad7dbe2a4..7a6158c2eb9 100755
|
|
--- a/Lib/tarfile.py
|
|
+++ b/Lib/tarfile.py
|
|
@@ -740,7 +740,7 @@ class SpecialFileError(FilterError):
|
|
class AbsoluteLinkError(FilterError):
|
|
def __init__(self, tarinfo):
|
|
self.tarinfo = tarinfo
|
|
- super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
|
|
+ super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
|
|
|
|
class LinkOutsideDestinationError(FilterError):
|
|
def __init__(self, tarinfo, path):
|
|
@@ -800,7 +800,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
|
|
if member.islnk() or member.issym():
|
|
if os.path.isabs(member.linkname):
|
|
raise AbsoluteLinkError(member)
|
|
- target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
|
|
+ if member.issym():
|
|
+ target_path = os.path.join(dest_path,
|
|
+ os.path.dirname(name),
|
|
+ member.linkname)
|
|
+ else:
|
|
+ target_path = os.path.join(dest_path,
|
|
+ member.linkname)
|
|
+ target_path = os.path.realpath(target_path)
|
|
if os.path.commonpath([target_path, dest_path]) != dest_path:
|
|
raise LinkOutsideDestinationError(member, target_path)
|
|
return new_attrs
|
|
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
|
|
index a66f7efd2d6..3df64c78032 100644
|
|
--- a/Lib/test/test_tarfile.py
|
|
+++ b/Lib/test/test_tarfile.py
|
|
@@ -3169,10 +3169,12 @@ class ArchiveMaker:
|
|
self.bio = None
|
|
|
|
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
|
|
- mode=None, **kwargs):
|
|
+ mode=None, size=None, **kwargs):
|
|
"""Add a member to the test archive. Call within `with`."""
|
|
name = str(name)
|
|
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
|
|
+ if size is not None:
|
|
+ tarinfo.size = size
|
|
if mode:
|
|
tarinfo.mode = _filemode_to_int(mode)
|
|
if symlink_to is not None:
|
|
@@ -3236,7 +3238,8 @@ class TestExtractionFilters(unittest.TestCase):
|
|
raise self.raised_exception
|
|
self.assertEqual(self.expected_paths, set())
|
|
|
|
- def expect_file(self, name, type=None, symlink_to=None, mode=None):
|
|
+ def expect_file(self, name, type=None, symlink_to=None, mode=None,
|
|
+ size=None):
|
|
"""Check a single file. See check_context."""
|
|
if self.raised_exception:
|
|
raise self.raised_exception
|
|
@@ -3270,6 +3273,8 @@ class TestExtractionFilters(unittest.TestCase):
|
|
self.assertTrue(path.is_fifo())
|
|
else:
|
|
raise NotImplementedError(type)
|
|
+ if size is not None:
|
|
+ self.assertEqual(path.stat().st_size, size)
|
|
for parent in path.parents:
|
|
self.expected_paths.discard(parent)
|
|
|
|
@@ -3315,8 +3320,15 @@ class TestExtractionFilters(unittest.TestCase):
|
|
# Test interplaying symlinks
|
|
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
|
|
with ArchiveMaker() as arc:
|
|
+
|
|
+ # `current` links to `.` which is both:
|
|
+ # - the destination directory
|
|
+ # - `current` itself
|
|
arc.add('current', symlink_to='.')
|
|
+
|
|
+ # effectively points to ./../
|
|
arc.add('parent', symlink_to='current/..')
|
|
+
|
|
arc.add('parent/evil')
|
|
|
|
if support.can_symlink():
|
|
@@ -3357,9 +3369,46 @@ class TestExtractionFilters(unittest.TestCase):
|
|
def test_parent_symlink2(self):
|
|
# Test interplaying symlinks
|
|
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
|
|
+
|
|
+ # Posix and Windows have different pathname resolution:
|
|
+ # either symlink or a '..' component resolve first.
|
|
+ # Let's see which we are on.
|
|
+ if support.can_symlink():
|
|
+ testpath = os.path.join(TEMPDIR, 'resolution_test')
|
|
+ os.mkdir(testpath)
|
|
+
|
|
+ # testpath/current links to `.` which is all of:
|
|
+ # - `testpath`
|
|
+ # - `testpath/current`
|
|
+ # - `testpath/current/current`
|
|
+ # - etc.
|
|
+ os.symlink('.', os.path.join(testpath, 'current'))
|
|
+
|
|
+ # we'll test where `testpath/current/../file` ends up
|
|
+ with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
|
|
+ pass
|
|
+
|
|
+ if os.path.exists(os.path.join(testpath, 'file')):
|
|
+ # Windows collapses 'current\..' to '.' first, leaving
|
|
+ # 'testpath\file'
|
|
+ dotdot_resolves_early = True
|
|
+ elif os.path.exists(os.path.join(testpath, '..', 'file')):
|
|
+ # Posix resolves 'current' to '.' first, leaving
|
|
+ # 'testpath/../file'
|
|
+ dotdot_resolves_early = False
|
|
+ else:
|
|
+ raise AssertionError('Could not determine link resolution')
|
|
+
|
|
with ArchiveMaker() as arc:
|
|
+
|
|
+ # `current` links to `.` which is both the destination directory
|
|
+ # and `current` itself
|
|
arc.add('current', symlink_to='.')
|
|
+
|
|
+ # `current/parent` is also available as `./parent`,
|
|
+ # and effectively points to `./../`
|
|
arc.add('current/parent', symlink_to='..')
|
|
+
|
|
arc.add('parent/evil')
|
|
|
|
with self.check_context(arc.open(), 'fully_trusted'):
|
|
@@ -3373,6 +3422,7 @@ class TestExtractionFilters(unittest.TestCase):
|
|
|
|
with self.check_context(arc.open(), 'tar'):
|
|
if support.can_symlink():
|
|
+ # Fail when extracting a file outside destination
|
|
self.expect_exception(
|
|
tarfile.OutsideDestinationError,
|
|
"'parent/evil' would be extracted to "
|
|
@@ -3383,10 +3433,24 @@ class TestExtractionFilters(unittest.TestCase):
|
|
self.expect_file('parent/evil')
|
|
|
|
with self.check_context(arc.open(), 'data'):
|
|
- self.expect_exception(
|
|
- tarfile.LinkOutsideDestinationError,
|
|
- """'current/parent' would link to ['"].*['"], """
|
|
- + "which is outside the destination")
|
|
+ if support.can_symlink():
|
|
+ if dotdot_resolves_early:
|
|
+ # Fail when extracting a file outside destination
|
|
+ self.expect_exception(
|
|
+ tarfile.OutsideDestinationError,
|
|
+ "'parent/evil' would be extracted to "
|
|
+ + """['"].*evil['"], which is outside """
|
|
+ + "the destination")
|
|
+ else:
|
|
+ # Fail as soon as we have a symlink outside the destination
|
|
+ self.expect_exception(
|
|
+ tarfile.LinkOutsideDestinationError,
|
|
+ "'current/parent' would link to "
|
|
+ + """['"].*outerdir['"], which is outside """
|
|
+ + "the destination")
|
|
+ else:
|
|
+ self.expect_file('current/')
|
|
+ self.expect_file('parent/evil')
|
|
|
|
def test_absolute_symlink(self):
|
|
# Test symlink to an absolute path
|
|
@@ -3415,11 +3479,29 @@ class TestExtractionFilters(unittest.TestCase):
|
|
with self.check_context(arc.open(), 'data'):
|
|
self.expect_exception(
|
|
tarfile.AbsoluteLinkError,
|
|
- "'parent' is a symlink to an absolute path")
|
|
+ "'parent' is a link to an absolute path")
|
|
+
|
|
+ def test_absolute_hardlink(self):
|
|
+ # Test hardlink to an absolute path
|
|
+ # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
|
|
+ with ArchiveMaker() as arc:
|
|
+ arc.add('parent', hardlink_to=self.outerdir / 'foo')
|
|
+
|
|
+ with self.check_context(arc.open(), 'fully_trusted'):
|
|
+ self.expect_exception(KeyError, ".*foo. not found")
|
|
+
|
|
+ with self.check_context(arc.open(), 'tar'):
|
|
+ self.expect_exception(KeyError, ".*foo. not found")
|
|
+
|
|
+ with self.check_context(arc.open(), 'data'):
|
|
+ self.expect_exception(
|
|
+ tarfile.AbsoluteLinkError,
|
|
+ "'parent' is a link to an absolute path")
|
|
|
|
def test_sly_relative0(self):
|
|
# Inspired by 'relative0' in jwilk/traversal-archives
|
|
with ArchiveMaker() as arc:
|
|
+ # points to `../../tmp/moo`
|
|
arc.add('../moo', symlink_to='..//tmp/moo')
|
|
|
|
try:
|
|
@@ -3469,6 +3551,54 @@ class TestExtractionFilters(unittest.TestCase):
|
|
+ """['"].*moo['"], which is outside the """
|
|
+ "destination")
|
|
|
|
+ def test_deep_symlink(self):
|
|
+ # Test that symlinks and hardlinks inside a directory
|
|
+ # point to the correct file (`target` of size 3).
|
|
+ # If links aren't supported we get a copy of the file.
|
|
+ with ArchiveMaker() as arc:
|
|
+ arc.add('targetdir/target', size=3)
|
|
+ # a hardlink's linkname is relative to the archive
|
|
+ arc.add('linkdir/hardlink', hardlink_to=os.path.join(
|
|
+ 'targetdir', 'target'))
|
|
+ # a symlink's linkname is relative to the link's directory
|
|
+ arc.add('linkdir/symlink', symlink_to=os.path.join(
|
|
+ '..', 'targetdir', 'target'))
|
|
+
|
|
+ for filter in 'tar', 'data', 'fully_trusted':
|
|
+ with self.check_context(arc.open(), filter):
|
|
+ self.expect_file('targetdir/target', size=3)
|
|
+ self.expect_file('linkdir/hardlink', size=3)
|
|
+ if support.can_symlink():
|
|
+ self.expect_file('linkdir/symlink', size=3,
|
|
+ symlink_to='../targetdir/target')
|
|
+ else:
|
|
+ self.expect_file('linkdir/symlink', size=3)
|
|
+
|
|
+ def test_chains(self):
|
|
+ # Test chaining of symlinks/hardlinks.
|
|
+ # Symlinks are created before the files they point to.
|
|
+ with ArchiveMaker() as arc:
|
|
+ arc.add('linkdir/symlink', symlink_to='hardlink')
|
|
+ arc.add('symlink2', symlink_to=os.path.join(
|
|
+ 'linkdir', 'hardlink2'))
|
|
+ arc.add('targetdir/target', size=3)
|
|
+ arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
|
|
+ arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
|
|
+
|
|
+ for filter in 'tar', 'data', 'fully_trusted':
|
|
+ with self.check_context(arc.open(), filter):
|
|
+ self.expect_file('targetdir/target', size=3)
|
|
+ self.expect_file('linkdir/hardlink', size=3)
|
|
+ self.expect_file('linkdir/hardlink2', size=3)
|
|
+ if support.can_symlink():
|
|
+ self.expect_file('linkdir/symlink', size=3,
|
|
+ symlink_to='hardlink')
|
|
+ self.expect_file('symlink2', size=3,
|
|
+ symlink_to='linkdir/hardlink2')
|
|
+ else:
|
|
+ self.expect_file('linkdir/symlink', size=3)
|
|
+ self.expect_file('symlink2', size=3)
|
|
+
|
|
def test_modes(self):
|
|
# Test how file modes are extracted
|
|
# (Note that the modes are ignored on platforms without working chmod)
|
|
diff --git a/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst
|
|
new file mode 100644
|
|
index 00000000000..32c1fb93f4a
|
|
--- /dev/null
|
|
+++ b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst
|
|
@@ -0,0 +1,3 @@
|
|
+:func:`tarfile.data_filter` now takes the location of symlinks into account
|
|
+when determining their target, so it will no longer reject some valid
|
|
+tarballs with ``LinkOutsideDestinationError``.
|
|
--
|
|
2.25.1
|
|
|