Skip to content

Commit

Permalink
Adding tarfile member sanitization to extractall()
Browse files Browse the repository at this point in the history
  • Loading branch information
TrellixVulnTeam committed Nov 25, 2022
1 parent 88c6f6d commit f2638f5
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
21 changes: 20 additions & 1 deletion examples/asr/librispeech/local/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,26 @@ def _progress(count, block_size, total_size):
"Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)
)
with tarfile.open(tar_filepath, "r") as tar:
tar.extractall(directory)
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar, directory)
finally:
gfile.Remove(tar_filepath)

Expand Down
21 changes: 20 additions & 1 deletion examples/tts/ljspeech/local/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,26 @@ def _progress(count, block_size, total_size):
"Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)
)
with tarfile.open(tar_filepath, "r") as tar:
tar.extractall(directory)
def is_within_directory(directory, target):

abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)

prefix = os.path.commonprefix([abs_directory, abs_target])

return prefix == abs_directory

def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")

tar.extractall(path, members, numeric_owner=numeric_owner)


safe_extract(tar, directory)
logging.info("Successfully extracted data from LJSpeech-1.1.tar.bz2")
finally:
GFILE.Remove(tar_filepath)
Expand Down

0 comments on commit f2638f5

Please sign in to comment.