"""
info["magic"] = POSIX_MAGIC
- if len(info["linkname"]) > LENGTH_LINK:
+ if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
raise ValueError("linkname is too long")
- if len(info["name"]) > LENGTH_NAME:
- info["prefix"], info["name"] = self._posix_split_name(info["name"])
+ if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
+ info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
return self._create_header(info, USTAR_FORMAT, encoding, errors)
info["magic"] = GNU_MAGIC
buf = b""
- if len(info["linkname"]) > LENGTH_LINK:
+ if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
- if len(info["name"]) > LENGTH_NAME:
+ if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
"""
return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
- def _posix_split_name(self, name):
+ def _posix_split_name(self, name, encoding, errors):
"""Split a name longer than 100 chars into a prefix
and a name part.
"""
- prefix = name[:LENGTH_PREFIX + 1]
- while prefix and prefix[-1] != "/":
- prefix = prefix[:-1]
-
- name = name[len(prefix):]
- prefix = prefix[:-1]
-
- if not prefix or len(name) > LENGTH_NAME:
+ components = name.split("/")
+ for i in range(1, len(components)):
+ prefix = "/".join(components[:i])
+ name = "/".join(components[i:])
+ if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
+ len(name.encode(encoding, errors)) <= LENGTH_NAME:
+ break
+ else:
raise ValueError("name is too long")
+
return prefix, name
@staticmethod
tar.close()
-class UstarUnicodeTest(unittest.TestCase):
-
- format = tarfile.USTAR_FORMAT
+class UnicodeTest:
def test_iso8859_1_filename(self):
self._test_unicode_filename("iso8859-1")
tar.close()
-class GNUUnicodeTest(UstarUnicodeTest):
+class UstarUnicodeTest(UnicodeTest, unittest.TestCase):
+
+ format = tarfile.USTAR_FORMAT
+
+ # Test whether the utf-8 encoded version of a filename exceeds the 100
+ # bytes name field limit (every occurrence of '\xff' will be expanded to 2
+ # bytes).
+ def test_unicode_name1(self):
+ self._test_ustar_name("0123456789" * 10)
+ self._test_ustar_name("0123456789" * 10 + "0", ValueError)
+ self._test_ustar_name("0123456789" * 9 + "01234567\xff")
+ self._test_ustar_name("0123456789" * 9 + "012345678\xff", ValueError)
+
+ def test_unicode_name2(self):
+ self._test_ustar_name("0123456789" * 9 + "012345\xff\xff")
+ self._test_ustar_name("0123456789" * 9 + "0123456\xff\xff", ValueError)
+
+ # Test whether the utf-8 encoded version of a filename exceeds the 155
+ # bytes prefix + '/' + 100 bytes name limit.
+ def test_unicode_longname1(self):
+ self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 10)
+ self._test_ustar_name("0123456789" * 15 + "0123/4" + "0123456789" * 10, ValueError)
+ self._test_ustar_name("0123456789" * 15 + "012\xff/" + "0123456789" * 10)
+ self._test_ustar_name("0123456789" * 15 + "0123\xff/" + "0123456789" * 10, ValueError)
+
+ def test_unicode_longname2(self):
+ self._test_ustar_name("0123456789" * 15 + "01\xff/2" + "0123456789" * 10, ValueError)
+ self._test_ustar_name("0123456789" * 15 + "01\xff\xff/" + "0123456789" * 10, ValueError)
+
+ def test_unicode_longname3(self):
+ self._test_ustar_name("0123456789" * 15 + "01\xff\xff/2" + "0123456789" * 10, ValueError)
+ self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "01234567\xff")
+ self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345678\xff", ValueError)
+
+ def test_unicode_longname4(self):
+ self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345\xff\xff")
+ self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "0123456\xff\xff", ValueError)
+
+ def _test_ustar_name(self, name, exc=None):
+ with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
+ t = tarfile.TarInfo(name)
+ if exc is None:
+ tar.addfile(t)
+ else:
+ self.assertRaises(exc, tar.addfile, t)
+
+ if exc is None:
+ with tarfile.open(tmpname, "r") as tar:
+ for t in tar:
+ self.assertEqual(name, t.name)
+ break
+
+ # Test the same as above for the 100 bytes link field.
+ def test_unicode_link1(self):
+ self._test_ustar_link("0123456789" * 10)
+ self._test_ustar_link("0123456789" * 10 + "0", ValueError)
+ self._test_ustar_link("0123456789" * 9 + "01234567\xff")
+ self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError)
+
+ def test_unicode_link2(self):
+ self._test_ustar_link("0123456789" * 9 + "012345\xff\xff")
+ self._test_ustar_link("0123456789" * 9 + "0123456\xff\xff", ValueError)
+
+ def _test_ustar_link(self, name, exc=None):
+ with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
+ t = tarfile.TarInfo("foo")
+ t.linkname = name
+ if exc is None:
+ tar.addfile(t)
+ else:
+ self.assertRaises(exc, tar.addfile, t)
+
+ if exc is None:
+ with tarfile.open(tmpname, "r") as tar:
+ for t in tar:
+ self.assertEqual(name, t.linkname)
+ break
+
+
+class GNUUnicodeTest(UnicodeTest, unittest.TestCase):
format = tarfile.GNU_FORMAT
self.fail("unable to read bad GNU tar pax header")
-class PAXUnicodeTest(UstarUnicodeTest):
+class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
format = tarfile.PAX_FORMAT