From: Lars Gustäbel Date: Fri, 28 Aug 2009 19:23:44 +0000 (+0000) Subject: Issue #6054: Do not normalize stored pathnames. X-Git-Tag: v2.7a1~612 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f7cda5287de0e33963c9fa09e2143bd2d169bd69;p=python Issue #6054: Do not normalize stored pathnames. No longer use tarfile.normpath() on pathnames. Store pathnames unchanged, i.e. do not remove "./", "../" and "//" occurrences. However, still convert absolute to relative paths. --- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 893d486ec1..54a26062ed 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -330,11 +330,6 @@ def filemode(mode): perm.append("-") return "".join(perm) -if os.sep != "/": - normpath = lambda path: os.path.normpath(path).replace(os.sep, "/") -else: - normpath = os.path.normpath - class TarError(Exception): """Base exception.""" pass @@ -956,7 +951,7 @@ class TarInfo(object): """Return the TarInfo's attributes as a dictionary. """ info = { - "name": normpath(self.name), + "name": self.name, "mode": self.mode & 07777, "uid": self.uid, "gid": self.gid, @@ -964,7 +959,7 @@ class TarInfo(object): "mtime": self.mtime, "chksum": self.chksum, "type": self.type, - "linkname": normpath(self.linkname) if self.linkname else "", + "linkname": self.linkname, "uname": self.uname, "gname": self.gname, "devmajor": self.devmajor, @@ -1815,10 +1810,9 @@ class TarFile(object): # Absolute paths are turned to relative paths. if arcname is None: arcname = name - arcname = normpath(arcname) drv, arcname = os.path.splitdrive(arcname) - while arcname[0:1] == "/": - arcname = arcname[1:] + arcname = arcname.replace(os.sep, "/") + arcname = arcname.lstrip("/") # Now, fill the TarInfo object with # information specific for the file. @@ -1947,16 +1941,6 @@ class TarFile(object): self._dbg(2, "tarfile: Skipped %r" % name) return - # Special case: The user wants to add the current - # working directory. - if name == ".": - if recursive: - if arcname == ".": - arcname = "" - for f in os.listdir(name): - self.add(f, os.path.join(arcname, f), recursive, exclude) - return - self._dbg(1, name) # Create a TarInfo object from the file. @@ -2123,9 +2107,8 @@ class TarFile(object): # Fetch the TarInfo object for the given name # and build the destination pathname, replacing # forward slashes to platform specific separators. - if targetpath[-1:] == "/": - targetpath = targetpath[:-1] - targetpath = os.path.normpath(targetpath) + targetpath = targetpath.rstrip("/") + targetpath = targetpath.replace("/", os.sep) # Create all upper directories. upperdirs = os.path.dirname(targetpath) @@ -2220,23 +2203,23 @@ class TarFile(object): (platform limitation), we try to make a copy of the referenced file instead of a link. """ - linkpath = tarinfo.linkname try: if tarinfo.issym(): - os.symlink(linkpath, targetpath) + os.symlink(tarinfo.linkname, targetpath) else: # See extract(). os.link(tarinfo._link_target, targetpath) except AttributeError: if tarinfo.issym(): - linkpath = os.path.join(os.path.dirname(tarinfo.name), - linkpath) - linkpath = normpath(linkpath) + linkpath = os.path.dirname(tarinfo.name) + "/" + \ + tarinfo.linkname + else: + linkpath = tarinfo.linkname try: self._extract_member(self.getmember(linkpath), targetpath) except (EnvironmentError, KeyError), e: - linkpath = os.path.normpath(linkpath) + linkpath = linkpath.replace("/", os.sep) try: shutil.copy2(linkpath, targetpath) except EnvironmentError, e: diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index aba218da20..8c9b0ec3dd 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -660,6 +660,76 @@ class WriteTest(WriteTestBase): finally: shutil.rmtree(tempdir) + # Guarantee that stored pathnames are not modified. Don't + # remove ./ or ../ or double slashes. Still make absolute + # pathnames relative. + # For details see bug #6054. + def _test_pathname(self, path, cmp_path=None, dir=False): + # Create a tarfile with an empty member named path + # and compare the stored name with the original. + foo = os.path.join(TEMPDIR, "foo") + if not dir: + open(foo, "w").close() + else: + os.mkdir(foo) + + tar = tarfile.open(tmpname, self.mode) + tar.add(foo, arcname=path) + tar.close() + + tar = tarfile.open(tmpname, "r") + t = tar.next() + tar.close() + + if not dir: + os.remove(foo) + else: + os.rmdir(foo) + + self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) + + def test_pathnames(self): + self._test_pathname("foo") + self._test_pathname(os.path.join("foo", ".", "bar")) + self._test_pathname(os.path.join("foo", "..", "bar")) + self._test_pathname(os.path.join(".", "foo")) + self._test_pathname(os.path.join(".", "foo", ".")) + self._test_pathname(os.path.join(".", "foo", ".", "bar")) + self._test_pathname(os.path.join(".", "foo", "..", "bar")) + self._test_pathname(os.path.join(".", "foo", "..", "bar")) + self._test_pathname(os.path.join("..", "foo")) + self._test_pathname(os.path.join("..", "foo", "..")) + self._test_pathname(os.path.join("..", "foo", ".", "bar")) + self._test_pathname(os.path.join("..", "foo", "..", "bar")) + + self._test_pathname("foo" + os.sep + os.sep + "bar") + self._test_pathname("foo" + os.sep + os.sep, "foo", dir=True) + + def test_abs_pathnames(self): + if sys.platform == "win32": + self._test_pathname("C:\\foo", "foo") + else: + self._test_pathname("/foo", "foo") + self._test_pathname("///foo", "foo") + + def test_cwd(self): + # Test adding the current working directory. + cwd = os.getcwd() + os.chdir(TEMPDIR) + try: + open("foo", "w").close() + + tar = tarfile.open(tmpname, self.mode) + tar.add(".") + tar.close() + + tar = tarfile.open(tmpname, "r") + for t in tar: + self.assert_(t.name == "." or t.name.startswith("./")) + tar.close() + finally: + os.chdir(cwd) + class StreamWriteTest(WriteTestBase): diff --git a/Misc/NEWS b/Misc/NEWS index a63182fdea..49a0942063 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -359,6 +359,8 @@ Core and Builtins Library ------- +- Issue #6054: Do not normalize stored pathnames in tarfile. + - Issue #6794: Fix Decimal.compare_total and Decimal.compare_total_mag: NaN payloads are now ordered by integer value rather than lexicographically.