Skip to content

Commit

Permalink
Extract tarballs more safely on Python with tarfile filters (PEP 706) (
Browse files Browse the repository at this point in the history
…GH-201)

Co-authored-by: Vinay Sajip <[email protected]>
  • Loading branch information
encukou and vsajip authored Jun 30, 2023
1 parent 0e9bfc0 commit 37a4fce
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 0 deletions.
13 changes: 13 additions & 0 deletions distlib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,19 @@ def check_path(path):
for tarinfo in archive.getmembers():
if not isinstance(tarinfo.name, text_type):
tarinfo.name = tarinfo.name.decode('utf-8')

# Limit extraction of dangerous items, if this Python
# allows it easily. If not, just trust the input.
# See: https://docs.python.org/3/library/tarfile.html#extraction-filters
def extraction_filter(member, path):
"""Run tarfile.tar_filter, but raise the expected ValueError"""
# This is only called if the current Python has tarfile filters
try:
return tarfile.tar_filter(member, path)
except tarfile.FilterError as exc:
raise ValueError(str(exc))
archive.extraction_filter = extraction_filter

archive.extractall(dest_dir)

finally:
Expand Down
Binary file added tests/evil.tar.gz
Binary file not shown.
4 changes: 4 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,10 @@ def test_unarchive(self):
)
bad_archives = ('bad.zip', 'bad.tar', 'bad.tar.gz', 'bad.tar.bz2')

# Test "evil" tarball on 3.12 *or* on Python with PEP-706 backported
if sys.version_info > (3, 12) or hasattr(tarfile, 'data_filter'):
bad_archives += ('evil.tar.gz',)

for name, cls, mode, lister in good_archives:
td = tempfile.mkdtemp()
archive = None
Expand Down

0 comments on commit 37a4fce

Please sign in to comment.