Rename utf8b error handler to surrogateescape.

author Martin v. Löwis <martin@v.loewis.de>

Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst

index b7bd125401dcba9f1b0e0d4479defb41b404ab3a..e3f98efc68664d0e07531441933a7f2ee326ee40 100644 (file)
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -322,7 +322,7 @@ and implemented by all standard Python codecs:
  | ``'backslashreplace'``  | Replace with backslashed escape sequences     |
  |                         | (only for encoding).                          |
  +-------------------------+-----------------------------------------------+
-| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           |
+| ``'surrogateescape'``   | Replace byte with surrogate U+DCxx.           |
  +-------------------------+-----------------------------------------------+
  
  In addition, the following error handlers are specific to a single codec:
@@ -335,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
  +-------------------+---------+-------------------------------------------+
  
  .. versionadded:: 3.1
-   The ``'utf8b'`` and ``'surrogatepass'`` error handlers.
+   The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
  
  The set of allowed values can be extended via :meth:`register_error`.
  
diff --git a/Doc/library/os.rst b/Doc/library/os.rst

index 83f5ee9dc0e8d02727c0f93aa17e4b0dd721b9a4..221374048cfd077e2a9735466b81980d841b908d 100644 (file)
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -64,8 +64,8 @@ perform this conversion (see :func:`sys.getfilesystemencoding`).
  
  .. versionchanged:: 3.1
     On some systems, conversion using the file system encoding may
-   fail. In this case, Python uses the ``utf8b`` encoding error
-   handler, which means that undecodable bytes are replaced by a
+   fail. In this case, Python uses the ``surrogateescape`` encoding
+   error handler, which means that undecodable bytes are replaced by a
     Unicode character U+DCxx on decoding, and these are again
     translated to the original byte on encoding.
  
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index 9ca769910be10d8d89acfad434a15d8c681eada3..4ec7b5865cc590a426f354e90deb1d4eec4bc36b 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1521,32 +1521,32 @@ class TypesTest(unittest.TestCase):
          self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
          self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
  
-class Utf8bTest(unittest.TestCase):
+class SurrogateEscapeTest(unittest.TestCase):
  
      def test_utf8(self):
          # Bad byte
-        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+        self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
                           "foo\udc80bar")
-        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+        self.assertEqual("foo\udc80bar".encode("utf-8", "surrogateescape"),
                           b"foo\x80bar")
          # bad-utf-8 encoded surrogate
-        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
                           "\udced\udcb0\udc80")
-        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "surrogateescape"),
                           b"\xed\xb0\x80")
  
      def test_ascii(self):
          # bad byte
-        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+        self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
                           "foo\udc80bar")
-        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+        self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
                           b"foo\x80bar")
  
      def test_charmap(self):
          # bad byte: \xa5 is unmapped in iso-8859-3
-        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
                           "foo\udca5bar")
-        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
                           b"foo\xa5bar")
  
  
@@ -1576,7 +1576,7 @@ def test_main():
          CharmapTest,
          WithStmtTest,
          TypesTest,
-        Utf8bTest,
+        SurrogateEscapeTest,
      )
  
  
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py

index 014d874f4d3d82b40e4205297ea0e1acef76ffc4..c680d8d77a371aaa35d7da9faa6154f017c5b00b 100644 (file)
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -708,13 +708,13 @@ if sys.platform != 'win32':
              self.fsencoding = sys.getfilesystemencoding()
              sys.setfilesystemencoding("utf-8")
              self.dir = support.TESTFN
-            self.bdir = self.dir.encode("utf-8", "utf8b")
+            self.bdir = self.dir.encode("utf-8", "surrogateescape")
              os.mkdir(self.dir)
              self.unicodefn = []
              for fn in self.filenames:
                  f = open(os.path.join(self.bdir, fn), "w")
                  f.close()
-                self.unicodefn.append(fn.decode("utf-8", "utf8b"))
+                self.unicodefn.append(fn.decode("utf-8", "surrogateescape"))
  
          def tearDown(self):
              shutil.rmtree(self.dir)
diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c

index 164f7e46d1b6b20de4aac096087ed4ba9280ea79..555dc12c69dd5a354683786c2bd68bed35336c4c 100644 (file)
--- a/Modules/_io/fileio.c
+++ b/Modules/_io/fileio.c
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
                                 return -1;
  
                         stringobj = PyUnicode_AsEncodedString(
-                               u, Py_FileSystemDefaultEncoding, "utf8b");
+                               u, Py_FileSystemDefaultEncoding, "surrogateescape");
                         Py_DECREF(u);
                         if (stringobj == NULL)
                                 return -1;
diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c

index 2050d5a1a789d280c4325b5967085c8e19d61822..21dcb4d963845f27e070143420fc6e020b7a18da 100644 (file)
--- a/Modules/posixmodule.c
+++ b/Modules/posixmodule.c
@@ -494,13 +494,13 @@ convertenviron(void)
                 if (p == NULL)
                         continue;
                 k = PyUnicode_Decode(*e, (int)(p-*e),
-                                    Py_FileSystemDefaultEncoding, "utf8b");
+                                    Py_FileSystemDefaultEncoding, "surrogateescape");
                 if (k == NULL) {
                         PyErr_Clear();
                         continue;
                 }
                 v = PyUnicode_Decode(p+1, strlen(p+1),
-                                    Py_FileSystemDefaultEncoding, "utf8b");
+                                    Py_FileSystemDefaultEncoding, "surrogateescape");
                 if (v == NULL) {
                         PyErr_Clear();
                         Py_DECREF(k);
@@ -2167,7 +2167,7 @@ posix_getcwd(int use_bytes)
                 return posix_error();
         if (use_bytes)
                 return PyBytes_FromStringAndSize(buf, strlen(buf));
-       return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
+       return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"surrogateescape");
  }
  
  PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2513,7 +2513,7 @@ posix_listdir(PyObject *self, PyObject *args)
  
                         w = PyUnicode_FromEncodedObject(v,
                                         Py_FileSystemDefaultEncoding,
-                                       "utf8b");
+                                       "surrogateescape");
                         Py_DECREF(v);
                         if (w != NULL)
                                 v = w;
@@ -4695,7 +4695,7 @@ posix_readlink(PyObject *self, PyObject *args)
  
                 w = PyUnicode_FromEncodedObject(v,
                                 Py_FileSystemDefaultEncoding,
-                               "utf8b");
+                               "surrogateescape");
                 if (w != NULL) {
                         Py_DECREF(v);
                         v = w;
diff --git a/Modules/python.c b/Modules/python.c

index 4c0a55bb1faabe77c717c6a89eba4f70d71547b1..13c6d5b82a42dd28998566bf92d709ae59967c5e 100644 (file)
--- a/Modules/python.c
+++ b/Modules/python.c
@@ -42,7 +42,7 @@ char2wchar(char* arg)
                         return res;
                 PyMem_Free(res);
         }
-       /* Conversion failed. Fall back to escaping with utf8b. */
+       /* Conversion failed. Fall back to escaping with surrogateescape. */
  #ifdef HAVE_MBRTOWC
         /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
         
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 3740892e675af067a3c1a26e16a207a802f037ec..3bd1efd9392f91f72700155439ae8a81f0fb91db 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1549,7 +1549,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
              return 0;
          output = PyUnicode_AsEncodedObject(arg, 
                                             Py_FileSystemDefaultEncoding,
-                                           "utf8b");
+                                           "surrogateescape");
          Py_DECREF(arg);
          if (!output)
              return 0;
diff --git a/Python/codecs.c b/Python/codecs.c

index cd6b7f0f60c6441cec2b6094d19064e55a743a12..d1915f181d1582c356b05e8d5e157d7e4e01ba49 100644 (file)
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -830,7 +830,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
  }
  
  static PyObject *
-PyCodec_UTF8bErrors(PyObject *exc)
+PyCodec_SurrogateEscapeErrors(PyObject *exc)
  {
      PyObject *restuple;
      PyObject *object;
@@ -940,9 +940,9 @@ static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc)
      return PyCodec_SurrogatePassErrors(exc);
  }
  
-static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
  {
-    return PyCodec_UTF8bErrors(exc);
+    return PyCodec_SurrogateEscapeErrors(exc);
  }
  
  static int _PyCodecRegistry_Init(void)
@@ -1001,10 +1001,10 @@ static int _PyCodecRegistry_Init(void)
             }
         },
         {
-           "utf8b",
+           "surrogateescape",
             {
-               "utf8b",
-               utf8b_errors,
+               "surrogateescape",
+               surrogateescape_errors,
                 METH_O
             }
         }
author	Martin v. Löwis <martin@v.loewis.de>
	Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Sun, 10 May 2009 08:15:24 +0000 (08:15 +0000)
Doc/library/codecs.rst		patch \| blob \| history
Doc/library/os.rst		patch \| blob \| history
Lib/test/test_codecs.py		patch \| blob \| history
Lib/test/test_os.py		patch \| blob \| history
Modules/_io/fileio.c		patch \| blob \| history
Modules/posixmodule.c		patch \| blob \| history
Modules/python.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Python/codecs.c		patch \| blob \| history