bpo-37751: Fix codecs.lookup() normalization (GH-15092)

author Jordon Xu <46997731+qigangxu@users.noreply.github.com>

Wed, 21 Aug 2019 13:26:20 +0000 (21:26 +0800)

committer Victor Stinner <vstinner@redhat.com>

Wed, 21 Aug 2019 13:26:20 +0000 (14:26 +0100)
author Jordon Xu <46997731+qigangxu@users.noreply.github.com>
Wed, 21 Aug 2019 13:26:20 +0000 (21:26 +0800)
committer Victor Stinner <vstinner@redhat.com>
Wed, 21 Aug 2019 13:26:20 +0000 (14:26 +0100)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst

new file mode 100644 (file)

index 0000000..4da59ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst
@@ -0,0 +1 @@
+Fix :func:`codecs.lookup` to normalize the encoding name the same way than :func:`encodings.normalize_encoding`, except that :func:`codecs.lookup` also converts the name to lower case.
diff --git a/Python/codecs.c b/Python/codecs.c

index 4f38b33e0b76f16ffe057a23c760eac773201e03..08e9b916f201d5d316c7cd4b1b1147d9b542d22a 100644 (file)
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -49,15 +49,16 @@ int PyCodec_Register(PyObject *search_function)
      return -1;
  }
  
-/* Convert a string to a normalized Python string: all characters are
-   converted to lower case, spaces are replaced with underscores. */
+extern int _Py_normalize_encoding(const char *, char *, size_t);
+
+/* Convert a string to a normalized Python string(decoded from UTF-8): all characters are
+   converted to lower case, spaces and hyphens are replaced with underscores. */
  
  static
  PyObject *normalizestring(const char *string)
  {
-    size_t i;
      size_t len = strlen(string);
-    char *p;
+    char *encoding;
      PyObject *v;
  
      if (len > PY_SSIZE_T_MAX) {
@@ -65,20 +66,19 @@ PyObject *normalizestring(const char *string)
          return NULL;
      }
  
-    p = PyMem_Malloc(len + 1);
-    if (p == NULL)
+    encoding = PyMem_Malloc(len + 1);
+    if (encoding == NULL)
          return PyErr_NoMemory();
-    for (i = 0; i < len; i++) {
-        char ch = string[i];
-        if (ch == ' ')
-            ch = '-';
-        else
-            ch = Py_TOLOWER(Py_CHARMASK(ch));
-        p[i] = ch;
+
+    if (!_Py_normalize_encoding(string, encoding, len + 1))
+    {
+        PyErr_SetString(PyExc_RuntimeError, "_Py_normalize_encoding() failed");
+        PyMem_Free(encoding);
+        return NULL;
      }
-    p[i] = '\0';
-    v = PyUnicode_FromString(p);
-    PyMem_Free(p);
+
+    v = PyUnicode_FromString(encoding);
+    PyMem_Free(encoding);
      return v;
  }
author	Jordon Xu <46997731+qigangxu@users.noreply.github.com>
	Wed, 21 Aug 2019 13:26:20 +0000 (21:26 +0800)
committer	Victor Stinner <vstinner@redhat.com>
	Wed, 21 Aug 2019 13:26:20 +0000 (14:26 +0100)
Misc/NEWS.d/next/Core and Builtins/2019-08-20-04-36-37.bpo-37751.CSFzUd.rst	[new file with mode: 0644]	patch \| blob
Python/codecs.c		patch \| blob \| history