]> granicus.if.org Git - python/commitdiff
Normalize the encoding names for Latin-1 and UTF-8 to
authorMarc-André Lemburg <mal@egenix.com>
Fri, 25 Feb 2011 15:42:01 +0000 (15:42 +0000)
committerMarc-André Lemburg <mal@egenix.com>
Fri, 25 Feb 2011 15:42:01 +0000 (15:42 +0000)
'latin-1' and 'utf-8'.

These are optimized in the Python Unicode implementation
to result in more direct processing, bypassing the codec
registry.

Also see issue11303.

32 files changed:
Lib/asynchat.py
Lib/distutils/command/bdist_wininst.py
Lib/ftplib.py
Lib/http/client.py
Lib/http/server.py
Lib/multiprocessing/connection.py
Lib/random.py
Lib/smtpd.py
Lib/sqlite3/test/types.py
Lib/sre_parse.py
Lib/tarfile.py
Lib/test/test_argparse.py
Lib/test/test_bigmem.py
Lib/test/test_bytes.py
Lib/test/test_cmd_line.py
Lib/test/test_codecs.py
Lib/test/test_io.py
Lib/test/test_mailbox.py
Lib/test/test_nntplib.py
Lib/test/test_pep3120.py
Lib/test/test_sax.py
Lib/test/test_shelve.py
Lib/test/test_socket.py
Lib/test/test_strlit.py
Lib/test/test_subprocess.py
Lib/test/test_tarfile.py
Lib/test/test_unicode.py
Lib/test/test_urllib.py
Lib/test/test_urllib2.py
Lib/test/test_uuid.py
Lib/test/test_xml_etree.py
Lib/urllib/request.py

index 65585129ceaaa9b40d1292fdee08ac70f1250521..2199d1b3eff37c42da09767d7ee1f7b939c4881f 100644 (file)
@@ -75,7 +75,7 @@ class async_chat (asyncore.dispatcher):
     # sign of an application bug that we don't want to pass silently
 
     use_encoding            = 0
-    encoding                = 'latin1'
+    encoding                = 'latin-1'
 
     def __init__ (self, sock=None, map=None):
         # for string terminator matching
index b2e2fc6dc8ec9d33cde57b9a0baf6b8eb67c56d6..b886055f27724a6628007e760e620b121c17bfaa 100644 (file)
@@ -263,11 +263,11 @@ class bdist_wininst(Command):
         cfgdata = cfgdata + b"\0"
         if self.pre_install_script:
             # We need to normalize newlines, so we open in text mode and
-            # convert back to bytes. "latin1" simply avoids any possible
+            # convert back to bytes. "latin-1" simply avoids any possible
             # failures.
             with open(self.pre_install_script, "r",
-                encoding="latin1") as script:
-                script_data = script.read().encode("latin1")
+                encoding="latin-1") as script:
+                script_data = script.read().encode("latin-1")
             cfgdata = cfgdata + script_data + b"\n\0"
         else:
             # empty pre-install script
index fd5a8633e36d16991743f47199f4e3421647d7f3..8dce9ac9af4f81896b38495af9ba1c5b157aaca1 100644 (file)
@@ -100,7 +100,7 @@ class FTP:
     file = None
     welcome = None
     passiveserver = 1
-    encoding = "latin1"
+    encoding = "latin-1"
 
     # Initialization method (called by class instantiation).
     # Initialize host to localhost, port to standard ftp port
index 36b734929b13ba2123c3ed03a3e20a5e647d8248..8ad7cb65c40132d50a89141fca65a05152e6413f 100644 (file)
@@ -697,7 +697,7 @@ class HTTPConnection:
         self.send(connect_bytes)
         for header, value in self._tunnel_headers.items():
             header_str = "%s: %s\r\n" % (header, value)
-            header_bytes = header_str.encode("latin1")
+            header_bytes = header_str.encode("latin-1")
             self.send(header_bytes)
         self.send(b'\r\n')
 
@@ -937,7 +937,7 @@ class HTTPConnection:
         values = list(values)
         for i, one_value in enumerate(values):
             if hasattr(one_value, 'encode'):
-                values[i] = one_value.encode('latin1')
+                values[i] = one_value.encode('latin-1')
             elif isinstance(one_value, int):
                 values[i] = str(one_value).encode('ascii')
         value = b'\r\n\t'.join(values)
index 543abe0c9f949a46eb1bb5e39ff467a75455dee1..a35fd9d6ee21f511bed1a2d50b6811e464967ba5 100644 (file)
@@ -448,7 +448,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
                 message = ''
         if self.request_version != 'HTTP/0.9':
             self.wfile.write(("%s %d %s\r\n" %
-                              (self.protocol_version, code, message)).encode('latin1', 'strict'))
+                              (self.protocol_version, code, message)).encode('latin-1', 'strict'))
 
     def send_header(self, keyword, value):
         """Send a MIME header."""
@@ -456,7 +456,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
             if not hasattr(self, '_headers_buffer'):
                 self._headers_buffer = []
             self._headers_buffer.append(
-                ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict'))
+                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
 
         if keyword.lower() == 'connection':
             if value.lower() == 'close':
index d6c23fb0ecdfe4da0c9b85afd0781696bf13dd28..d6627e5d2d160b6ee0675685db5e2372e4483165 100644 (file)
@@ -434,10 +434,10 @@ class ConnectionWrapper(object):
         return self._loads(s)
 
 def _xml_dumps(obj):
-    return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf8')
+    return xmlrpclib.dumps((obj,), None, None, None, 1).encode('utf-8')
 
 def _xml_loads(s):
-    (obj,), method = xmlrpclib.loads(s.decode('utf8'))
+    (obj,), method = xmlrpclib.loads(s.decode('utf-8'))
     return obj
 
 class XmlListener(Listener):
index cb49d5634f6cee509e45c01b91ce3cab328bf40e..f29803e8fbf74ee3478c5583559ebf153f51432b 100644 (file)
@@ -114,7 +114,7 @@ class Random(_random.Random):
         if version == 2:
             if isinstance(a, (str, bytes, bytearray)):
                 if isinstance(a, str):
-                    a = a.encode("utf8")
+                    a = a.encode("utf-8")
                 a += _sha512(a).digest()
                 a = int.from_bytes(a, 'big')
 
index 599e79b7eddf91e0f433af2d5ddcaf7f98345b0a..32f45ae768434d4d810728880532c5eb6261699f 100755 (executable)
@@ -275,7 +275,7 @@ class SMTPChannel(asynchat.async_chat):
             return
         elif limit:
             self.num_bytes += len(data)
-        self.received_lines.append(str(data, "utf8"))
+        self.received_lines.append(str(data, "utf-8"))
 
     # Implementation of base class abstract method
     def found_terminator(self):
index 29413e14ec37689f94752b13e11d0ac92674bb8c..d214f3d630676175cd4d69f5721e03cfc95dfed1 100644 (file)
@@ -85,7 +85,7 @@ class DeclTypesTests(unittest.TestCase):
             if isinstance(_val, bytes):
                 # sqlite3 always calls __init__ with a bytes created from a
                 # UTF-8 string when __conform__ was used to store the object.
-                _val = _val.decode('utf8')
+                _val = _val.decode('utf-8')
             self.val = _val
 
         def __cmp__(self, other):
index 13737ca12f0e3ffd8ce362f26a8309057eadf081..ae63c31ebc04e0b7779e8bb061aaa81ca70161da 100644 (file)
@@ -791,7 +791,7 @@ def parse_template(source, pattern):
     else:
         # The tokenizer implicitly decodes bytes objects as latin-1, we must
         # therefore re-encode the final representation.
-        encode = lambda x: x.encode('latin1')
+        encode = lambda x: x.encode('latin-1')
     for c, s in p:
         if c is MARK:
             groupsappend((i, s))
index 0f9d1dade180239aabdd570db363eb6500bfe830..6b663f4d289c344d9aa2e6a78d271c569147839b 100644 (file)
@@ -1084,7 +1084,7 @@ class TarInfo(object):
     def create_pax_global_header(cls, pax_headers):
         """Return the object as a pax global header block sequence.
         """
-        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8")
+        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
 
     def _posix_split_name(self, name):
         """Split a name longer than 100 chars into a prefix
@@ -1167,7 +1167,7 @@ class TarInfo(object):
         binary = False
         for keyword, value in pax_headers.items():
             try:
-                value.encode("utf8", "strict")
+                value.encode("utf-8", "strict")
             except UnicodeEncodeError:
                 binary = True
                 break
@@ -1178,13 +1178,13 @@ class TarInfo(object):
             records += b"21 hdrcharset=BINARY\n"
 
         for keyword, value in pax_headers.items():
-            keyword = keyword.encode("utf8")
+            keyword = keyword.encode("utf-8")
             if binary:
                 # Try to restore the original byte representation of `value'.
                 # Needless to say, that the encoding must match the string.
                 value = value.encode(encoding, "surrogateescape")
             else:
-                value = value.encode("utf8")
+                value = value.encode("utf-8")
 
             l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
             n = p = 0
@@ -1393,7 +1393,7 @@ class TarInfo(object):
         # the translation to UTF-8 fails.
         match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
         if match is not None:
-            pax_headers["hdrcharset"] = match.group(1).decode("utf8")
+            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
 
         # For the time being, we don't care about anything other than "BINARY".
         # The only other value that is currently allowed by the standard is
@@ -1402,7 +1402,7 @@ class TarInfo(object):
         if hdrcharset == "BINARY":
             encoding = tarfile.encoding
         else:
-            encoding = "utf8"
+            encoding = "utf-8"
 
         # Parse pax header information. A record looks like that:
         # "%d %s=%s\n" % (length, keyword, value). length is the size
@@ -1419,20 +1419,20 @@ class TarInfo(object):
             length = int(length)
             value = buf[match.end(2) + 1:match.start(1) + length - 1]
 
-            # Normally, we could just use "utf8" as the encoding and "strict"
+            # Normally, we could just use "utf-8" as the encoding and "strict"
             # as the error handler, but we better not take the risk. For
             # example, GNU tar <= 1.23 is known to store filenames it cannot
             # translate to UTF-8 as raw strings (unfortunately without a
             # hdrcharset=BINARY header).
             # We first try the strict standard encoding, and if that fails we
             # fall back on the user's encoding and error handler.
-            keyword = self._decode_pax_field(keyword, "utf8", "utf8",
+            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
                     tarfile.errors)
             if keyword in PAX_NAME_FIELDS:
                 value = self._decode_pax_field(value, encoding, tarfile.encoding,
                         tarfile.errors)
             else:
-                value = self._decode_pax_field(value, "utf8", "utf8",
+                value = self._decode_pax_field(value, "utf-8", "utf-8",
                         tarfile.errors)
 
             pax_headers[keyword] = value
index 03c95fade4125e27d49f68e232f878295a292b2b..8d803360ce4fef033de6724085adc42aabd6df0c 100644 (file)
@@ -4328,7 +4328,7 @@ class TestEncoding(TestCase):
     def _test_module_encoding(self, path):
         path, _ = os.path.splitext(path)
         path += ".py"
-        with codecs.open(path, 'r', 'utf8') as f:
+        with codecs.open(path, 'r', 'utf-8') as f:
             f.read()
 
     def test_argparse_module_encoding(self):
index ac6b1096e1637c4f37ff421d8e305a756c7498b3..f9a0a3dceaaecd3d14c49d76fe628ffa07d4a683 100644 (file)
@@ -707,7 +707,7 @@ class StrTest(unittest.TestCase, BaseStrTest):
 class BytesTest(unittest.TestCase, BaseStrTest):
 
     def from_latin1(self, s):
-        return s.encode("latin1")
+        return s.encode("latin-1")
 
     @bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
     def test_decode(self, size):
@@ -718,7 +718,7 @@ class BytesTest(unittest.TestCase, BaseStrTest):
 class BytearrayTest(unittest.TestCase, BaseStrTest):
 
     def from_latin1(self, s):
-        return bytearray(s.encode("latin1"))
+        return bytearray(s.encode("latin-1"))
 
     @bigmemtest(minsize=_2G + 2, memuse=1 + character_size)
     def test_decode(self, size):
index e5c7ccd4ff0aca18418639bd356da4672f88daa7..84867bb64f2c1088c4507f99415304f6d5ecb9f8 100644 (file)
@@ -188,24 +188,26 @@ class BaseBytesTest(unittest.TestCase):
 
     def test_encoding(self):
         sample = "Hello world\n\u1234\u5678\u9abc"
-        for enc in ("utf8", "utf16"):
+        for enc in ("utf-8", "utf-16"):
             b = self.type2test(sample, enc)
             self.assertEqual(b, self.type2test(sample.encode(enc)))
-        self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin1")
-        b = self.type2test(sample, "latin1", "ignore")
+        self.assertRaises(UnicodeEncodeError, self.type2test, sample, "latin-1")
+        b = self.type2test(sample, "latin-1", "ignore")
         self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
 
     def test_decode(self):
         sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
-        for enc in ("utf8", "utf16"):
+        for enc in ("utf-8", "utf-16"):
             b = self.type2test(sample, enc)
             self.assertEqual(b.decode(enc), sample)
         sample = "Hello world\n\x80\x81\xfe\xff"
-        b = self.type2test(sample, "latin1")
-        self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
-        self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
-        self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
+        b = self.type2test(sample, "latin-1")
+        self.assertRaises(UnicodeDecodeError, b.decode, "utf-8")
+        self.assertEqual(b.decode("utf-8", "ignore"), "Hello world\n")
+        self.assertEqual(b.decode(errors="ignore", encoding="utf-8"),
                          "Hello world\n")
+        # Default encoding is utf-8
+        self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
 
     def test_from_int(self):
         b = self.type2test(0)
index c4e3adfb534d209c136c0569b2d6a7457899aa43..a0a85aeee80900a91b86172347c58afc56b7377c 100644 (file)
@@ -151,7 +151,7 @@ class CmdLineTest(unittest.TestCase):
     @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X')
     def test_osx_utf8(self):
         def check_output(text):
-            decoded = text.decode('utf8', 'surrogateescape')
+            decoded = text.decode('utf-8', 'surrogateescape')
             expected = ascii(decoded).encode('ascii') + b'\n'
 
             env = os.environ.copy()
@@ -223,7 +223,7 @@ class CmdLineTest(unittest.TestCase):
         self.assertIn(path2.encode('ascii'), out)
 
     def test_displayhook_unencodable(self):
-        for encoding in ('ascii', 'latin1', 'utf8'):
+        for encoding in ('ascii', 'latin-1', 'utf-8'):
             env = os.environ.copy()
             env['PYTHONIOENCODING'] = encoding
             p = subprocess.Popen(
index d560d7aa4646bcc2a1208a0ec9fb6308611f1eb2..67a5aed01c4a88ad6c63f2c147bfe2bcf3443616 100644 (file)
@@ -1250,7 +1250,7 @@ class EncodedFileTest(unittest.TestCase):
         self.assertEqual(ef.read(), b'\\\xd5\n\x00\x00\xae')
 
         f = io.BytesIO()
-        ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
+        ef = codecs.EncodedFile(f, 'utf-8', 'latin-1')
         ef.write(b'\xc3\xbc')
         self.assertEqual(f.getvalue(), b'\xfc')
 
@@ -1611,7 +1611,7 @@ class SurrogateEscapeTest(unittest.TestCase):
 
     def test_latin1(self):
         # Issue6373
-        self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"),
+        self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
                          b"\xe4\xeb\xef\xf6\xfc")
 
 
index 3a2589e36a43d7bb2a99a4c63d68982a8eb7c660..624a68de3f98085bb07d20bae6a3d23843c10acb 100644 (file)
@@ -46,7 +46,7 @@ except ImportError:
 
 def _default_chunk_size():
     """Get the default TextIOWrapper chunk size"""
-    with open(__file__, "r", encoding="latin1") as f:
+    with open(__file__, "r", encoding="latin-1") as f:
         return f._CHUNK_SIZE
 
 
@@ -1684,11 +1684,11 @@ class TextIOWrapperTest(unittest.TestCase):
         r = self.BytesIO(b"\xc3\xa9\n\n")
         b = self.BufferedReader(r, 1000)
         t = self.TextIOWrapper(b)
-        t.__init__(b, encoding="latin1", newline="\r\n")
-        self.assertEqual(t.encoding, "latin1")
+        t.__init__(b, encoding="latin-1", newline="\r\n")
+        self.assertEqual(t.encoding, "latin-1")
         self.assertEqual(t.line_buffering, False)
-        t.__init__(b, encoding="utf8", line_buffering=True)
-        self.assertEqual(t.encoding, "utf8")
+        t.__init__(b, encoding="utf-8", line_buffering=True)
+        self.assertEqual(t.encoding, "utf-8")
         self.assertEqual(t.line_buffering, True)
         self.assertEqual("\xe9\n", t.readline())
         self.assertRaises(TypeError, t.__init__, b, newline=42)
@@ -1738,8 +1738,8 @@ class TextIOWrapperTest(unittest.TestCase):
     def test_encoding(self):
         # Check the encoding attribute is always set, and valid
         b = self.BytesIO()
-        t = self.TextIOWrapper(b, encoding="utf8")
-        self.assertEqual(t.encoding, "utf8")
+        t = self.TextIOWrapper(b, encoding="utf-8")
+        self.assertEqual(t.encoding, "utf-8")
         t = self.TextIOWrapper(b)
         self.assertTrue(t.encoding is not None)
         codecs.lookup(t.encoding)
@@ -1918,7 +1918,7 @@ class TextIOWrapperTest(unittest.TestCase):
 
     def test_basic_io(self):
         for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
-            for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le":
+            for enc in "ascii", "latin-1", "utf-8" :# , "utf-16-be", "utf-16-le":
                 f = self.open(support.TESTFN, "w+", encoding=enc)
                 f._CHUNK_SIZE = chunksize
                 self.assertEqual(f.write("abc"), 3)
@@ -1968,7 +1968,7 @@ class TextIOWrapperTest(unittest.TestCase):
         self.assertEqual(rlines, wlines)
 
     def test_telling(self):
-        f = self.open(support.TESTFN, "w+", encoding="utf8")
+        f = self.open(support.TESTFN, "w+", encoding="utf-8")
         p0 = f.tell()
         f.write("\xff\n")
         p1 = f.tell()
index 1e4f88759bd202b18bdb720188cba309cccf9875..03f814a8304783347445cbd2cb441f45602a8383 100644 (file)
@@ -95,14 +95,14 @@ class TestMailbox(TestBase):
             """)
 
     def test_add_invalid_8bit_bytes_header(self):
-        key = self._box.add(self._nonascii_msg.encode('latin1'))
+        key = self._box.add(self._nonascii_msg.encode('latin-1'))
         self.assertEqual(len(self._box), 1)
         self.assertEqual(self._box.get_bytes(key),
-            self._nonascii_msg.encode('latin1'))
+            self._nonascii_msg.encode('latin-1'))
 
     def test_invalid_nonascii_header_as_string(self):
         subj = self._nonascii_msg.splitlines()[1]
-        key = self._box.add(subj.encode('latin1'))
+        key = self._box.add(subj.encode('latin-1'))
         self.assertEqual(self._box.get_string(key),
             'Subject: =?unknown-8bit?b?RmFsaW5hcHThciBo4Xpob3pzeuFsbO104XNz'
             'YWwuIE3hciByZW5kZWx06Ww/?=\n\n')
index 4577ddb9d15109a511d7eee24eaa11541f23ff26..e463e5231d20c03c0bb27097a394e8ec7c5ac2f8 100644 (file)
@@ -813,7 +813,7 @@ class NNTPv1v2TestsMixin:
 
     def _check_article_body(self, lines):
         self.assertEqual(len(lines), 4)
-        self.assertEqual(lines[-1].decode('utf8'), "-- Signed by André.")
+        self.assertEqual(lines[-1].decode('utf-8'), "-- Signed by André.")
         self.assertEqual(lines[-2], b"")
         self.assertEqual(lines[-3], b".Here is a dot-starting line.")
         self.assertEqual(lines[-4], b"This is just a test article.")
index 09fedf0d543ac59df80de4198efe59859b0ecaa7..496f8da97125439af5e51b9a40283201a34b8c7e 100644 (file)
@@ -19,8 +19,8 @@ class PEP3120Test(unittest.TestCase):
         try:
             import test.badsyntax_pep3120
         except SyntaxError as msg:
-            msg = str(msg)
-            self.assertTrue('UTF-8' in msg or 'utf8' in msg)
+            msg = str(msg).lower()
+            self.assertTrue('utf-8' in msg or 'utf8' in msg)
         else:
             self.fail("expected exception didn't occur")
 
index 0f6a1ca96ccdbd223004ccdb9a66053e443f6e72..bddb375eaed173d14646e927093a008d54e23e2c 100644 (file)
@@ -20,8 +20,8 @@ import unittest
 TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
 TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
 try:
-    TEST_XMLFILE.encode("utf8")
-    TEST_XMLFILE_OUT.encode("utf8")
+    TEST_XMLFILE.encode("utf-8")
+    TEST_XMLFILE_OUT.encode("utf-8")
 except UnicodeEncodeError:
     raise unittest.SkipTest("filename is not encodable to utf8")
 
index c9c006717f12be8644d287402ee475534d926274..13c126566d62c73d04fdeed92d05127e99929764 100644 (file)
@@ -129,8 +129,8 @@ class TestCase(unittest.TestCase):
         shelve.Shelf(d)[key] = [1]
         self.assertIn(key.encode('utf-8'), d)
         # but a different one can be given
-        shelve.Shelf(d, keyencoding='latin1')[key] = [1]
-        self.assertIn(key.encode('latin1'), d)
+        shelve.Shelf(d, keyencoding='latin-1')[key] = [1]
+        self.assertIn(key.encode('latin-1'), d)
         # with all consequences
         s = shelve.Shelf(d, keyencoding='ascii')
         self.assertRaises(UnicodeEncodeError, s.__setitem__, key, [1])
index 23d22a8695fe57af3cfe991b6037a05aa012fe65..9d5d8ca026904d0bedfd966ce164e361b7e2356a 100644 (file)
@@ -44,7 +44,7 @@ def linux_version():
         return 0, 0, 0
 
 HOST = support.HOST
-MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf8') ## test unicode string and carriage return
+MSG = 'Michael Gilfix was here\u1234\r\n'.encode('utf-8') ## test unicode string and carriage return
 SUPPORTS_IPV6 = socket.has_ipv6 and try_address('::1', family=socket.AF_INET6)
 
 try:
@@ -1065,7 +1065,7 @@ class FileObjectClassTestCase(SocketConnectedTest):
     """
 
     bufsize = -1 # Use default buffer size
-    encoding = 'utf8'
+    encoding = 'utf-8'
     errors = 'strict'
     newline = None
 
@@ -1269,7 +1269,7 @@ class FileObjectInterruptedTestCase(unittest.TestCase):
             data = b''
         else:
             data = ''
-            expecting = expecting.decode('utf8')
+            expecting = expecting.decode('utf-8')
         while len(data) != len(expecting):
             part = fo.read(size)
             if not part:
@@ -1427,7 +1427,7 @@ class UnicodeReadFileObjectClassTestCase(FileObjectClassTestCase):
     """Tests for socket.makefile() in text mode (rather than binary)"""
 
     read_mode = 'r'
-    read_msg = MSG.decode('utf8')
+    read_msg = MSG.decode('utf-8')
     write_mode = 'wb'
     write_msg = MSG
     newline = ''
@@ -1439,7 +1439,7 @@ class UnicodeWriteFileObjectClassTestCase(FileObjectClassTestCase):
     read_mode = 'rb'
     read_msg = MSG
     write_mode = 'w'
-    write_msg = MSG.decode('utf8')
+    write_msg = MSG.decode('utf-8')
     newline = ''
 
 
@@ -1447,9 +1447,9 @@ class UnicodeReadWriteFileObjectClassTestCase(FileObjectClassTestCase):
     """Tests for socket.makefile() in text mode (rather than binary)"""
 
     read_mode = 'r'
-    read_msg = MSG.decode('utf8')
+    read_msg = MSG.decode('utf-8')
     write_mode = 'w'
-    write_msg = MSG.decode('utf8')
+    write_msg = MSG.decode('utf-8')
     newline = ''
 
 
index 23d96f867a85da681b5da88702a2887bac39fead..2bcf4d1ad88b09558cf83ec86350cf90fbb0dc09 100644 (file)
@@ -130,7 +130,7 @@ class TestLiterals(unittest.TestCase):
         self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
 
     def test_file_utf8(self):
-        self.check_encoding("utf8")
+        self.check_encoding("utf-8")
 
     def test_file_iso_8859_1(self):
         self.check_encoding("iso-8859-1")
index 4b58308497976def800adf5c08f9248e683996d4..9e267ebf59e14518325a9e1ea25871b02d4bdcf7 100644 (file)
@@ -1191,7 +1191,7 @@ class POSIXProcessTestCase(BaseTestCase):
         stdout, stderr = p.communicate()
         self.assertEqual(0, p.returncode, "sigchild_ignore.py exited"
                          " non-zero with this error:\n%s" %
-                         stderr.decode('utf8'))
+                         stderr.decode('utf-8'))
 
 
 @unittest.skipUnless(mswindows, "Windows specific tests")
index 68e094d5dbc872199b252f6570a923d93792c5f7..a645bf286334df8428cabcb89f4c4a84a5e8778e 100644 (file)
@@ -1289,7 +1289,7 @@ class UstarUnicodeTest(unittest.TestCase):
         self._test_unicode_filename("utf7")
 
     def test_utf8_filename(self):
-        self._test_unicode_filename("utf8")
+        self._test_unicode_filename("utf-8")
 
     def _test_unicode_filename(self, encoding):
         tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
@@ -1368,7 +1368,7 @@ class GNUUnicodeTest(UstarUnicodeTest):
     def test_bad_pax_header(self):
         # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
         # without a hdrcharset=BINARY header.
-        for encoding, name in (("utf8", "pax/bad-pax-\udce4\udcf6\udcfc"),
+        for encoding, name in (("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
                 ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
             with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
                 try:
@@ -1383,7 +1383,7 @@ class PAXUnicodeTest(UstarUnicodeTest):
 
     def test_binary_header(self):
         # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
-        for encoding, name in (("utf8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
+        for encoding, name in (("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
                 ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
             with tarfile.open(tarname, encoding=encoding, errors="surrogateescape") as tar:
                 try:
index 9ad9eed6ac97d6350f584dbdc03042466cc5bce7..d97894ce26d70007db076d68c0c66402e911f3a3 100644 (file)
@@ -1182,11 +1182,14 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual('hello'.encode('ascii'), b'hello')
         self.assertEqual('hello'.encode('utf-7'), b'hello')
         self.assertEqual('hello'.encode('utf-8'), b'hello')
-        self.assertEqual('hello'.encode('utf8'), b'hello')
+        self.assertEqual('hello'.encode('utf-8'), b'hello')
         self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
         self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
         self.assertEqual('hello'.encode('latin-1'), b'hello')
 
+        # Default encoding is utf-8
+        self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
+
         # Roundtrip safety for BMP (just the first 1024 chars)
         for c in range(1024):
             u = chr(c)
index 3003331416f496be9c207f5ad070eb68924feaac..e148c62b90165f773c96ed7cf0078a93b7f4e2c8 100644 (file)
@@ -251,7 +251,7 @@ class urlretrieve_FileTests(unittest.TestCase):
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
         try:
-            filePath.encode("utf8")
+            filePath.encode("utf-8")
         except UnicodeEncodeError:
             raise unittest.SkipTest("filePath is not encodable to utf8")
         return "file://%s" % urllib.request.pathname2url(filePath)
index 14336705faa66cbf7376046b78eb0ee024d8c736..4ddbe3f94bd90491dae1b7851c6e5eeb3ba12804 100644 (file)
@@ -599,7 +599,7 @@ class OpenerDirectorTests(unittest.TestCase):
 
 def sanepathname2url(path):
     try:
-        path.encode("utf8")
+        path.encode("utf-8")
     except UnicodeEncodeError:
         raise unittest.SkipTest("path is not encodable to utf8")
     urlpath = urllib.request.pathname2url(path)
index 43fa6561023b55a9940cc4a97c5aa0fe40e3bd80..7bc59ed9fdf560a3a45bf13d370532cdf271e884 100644 (file)
@@ -471,14 +471,14 @@ class TestUUID(TestCase):
         if pid == 0:
             os.close(fds[0])
             value = uuid.uuid4()
-            os.write(fds[1], value.hex.encode('latin1'))
+            os.write(fds[1], value.hex.encode('latin-1'))
             os._exit(0)
 
         else:
             os.close(fds[1])
             parent_value = uuid.uuid4().hex
             os.waitpid(pid, 0)
-            child_value = os.read(fds[0], 100).decode('latin1')
+            child_value = os.read(fds[0], 100).decode('latin-1')
 
             self.assertNotEqual(parent_value, child_value)
 
index 22fafa9a2e64ff175b1c5dc649e628810c0c8574..40c22917c07440c464aa76db5d9d54a9c3ec4799 100644 (file)
@@ -22,7 +22,7 @@ from xml.etree import ElementTree as ET
 
 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
 try:
-    SIMPLE_XMLFILE.encode("utf8")
+    SIMPLE_XMLFILE.encode("utf-8")
 except UnicodeEncodeError:
     raise unittest.SkipTest("filename is not encodable to utf8")
 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
@@ -1255,8 +1255,8 @@ def processinginstruction():
 
     >>> ET.tostring(ET.PI('test', '<testing&>'))
     b'<?test <testing&>?>'
-    >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1')
-    b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
+    >>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1')
+    b"<?xml version='1.0' encoding='latin-1'?>\\n<?test <testing&>\\xe3?>"
     """
 
 #
index dfdbdecb80c8b076b1d34f4ed33b531619a3a281..d583a82fbd7eb55bf242619579345675f479541a 100644 (file)
@@ -1846,7 +1846,7 @@ class URLopener:
         if encoding == 'base64':
             import base64
             # XXX is this encoding/decoding ok?
-            data = base64.decodebytes(data.encode('ascii')).decode('latin1')
+            data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
         else:
             data = unquote(data)
         msg.append('Content-Length: %d' % len(data))