]> granicus.if.org Git - python/commitdiff
Fix and deprecated the unicode_internal codec
authorVictor Stinner <victor.stinner@haypocalc.com>
Thu, 10 Nov 2011 19:56:30 +0000 (20:56 +0100)
committerVictor Stinner <victor.stinner@haypocalc.com>
Thu, 10 Nov 2011 19:56:30 +0000 (20:56 +0100)
unicode_internal codec uses Py_UNICODE instead of the real internal
representation (PEP 393: Py_UCS1, Py_UCS2 or Py_UCS4) for backward
compatibility.

Doc/library/codecs.rst
Doc/whatsnew/3.3.rst
Modules/_codecsmodule.c
Objects/unicodeobject.c

index 4523c7ff2cebdfd70b41f823b9b930f5621425fb..a9fae95d07b6baa18a0e4302ab1eae41ef71d167 100644 (file)
@@ -1173,6 +1173,8 @@ particular, the following variants typically exist:
 | unicode_internal   |         | Return the internal       |
 |                    |         | representation of the     |
 |                    |         | operand                   |
+|                    |         |                           |
+|                    |         | .. deprecated:: 3.3       |
 +--------------------+---------+---------------------------+
 
 The following codecs provide bytes-to-bytes mappings.
index 911d8d9f7e541ed1e124a1f04f8f9d327d02c490..7f4517ff28fd452694fef3f414a62b939480475b 100644 (file)
@@ -250,6 +250,8 @@ versions.
 
 (:issue:`12100`)
 
+The ``unicode_internal`` codec has been deprecated.
+
 crypt
 -----
 
index 727cf5e77a3b607c7840706a5bdf30550c0c20df..93cb1b702b5a816e7dbb7235957f27fee1055a50 100644 (file)
@@ -675,18 +675,30 @@ unicode_internal_encode(PyObject *self,
     PyObject *obj;
     const char *errors = NULL;
     const char *data;
-    Py_ssize_t size;
+    Py_ssize_t len, size;
 
     if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
                           &obj, &errors))
         return NULL;
 
     if (PyUnicode_Check(obj)) {
+        Py_UNICODE *u;
+
         if (PyUnicode_READY(obj) < 0)
             return NULL;
-        data = PyUnicode_AS_DATA(obj);
-        size = PyUnicode_GET_DATA_SIZE(obj);
-        return codec_tuple(PyBytes_FromStringAndSize(data, size),
+
+        if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                         "unicode_internal codecs has been deprecated",
+                         1))
+            return NULL;
+
+        u = PyUnicode_AsUnicodeAndSize(obj, &len);
+        if (u == NULL)
+            return NULL;
+        if (len > PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
+            return PyErr_NoMemory();
+        size = len * sizeof(Py_UNICODE);
+        return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
                            PyUnicode_GET_LENGTH(obj));
     }
     else {
index 61534b48d588763e98fa12bd9497184350b0046a..3f580b5ff606114e5c7df8e19985e1bdd780fd2c 100644 (file)
@@ -6237,6 +6237,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
+    if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                     "unicode_internal codecs has been deprecated",
+                     1))
+        return NULL;
+
     /* XXX overflow detection missing */
     v = PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE, 127);
     if (v == NULL)
@@ -6270,15 +6275,26 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
                     errors, &errorHandler,
                     "unicode_internal", reason,
                     &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos)) {
+                    &v, &outpos))
                 goto onError;
-            }
+            continue;
         }
-        else {
-            if (unicode_putchar(&v, &outpos, ch) < 0)
-                goto onError;
-            s += Py_UNICODE_SIZE;
+
+        s += Py_UNICODE_SIZE;
+#ifndef Py_UNICODE_WIDE
+        if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
+        {
+            Py_UCS4 ch2 = *(Py_UNICODE*)s;
+            if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
+            {
+                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+                s += Py_UNICODE_SIZE;
+            }
         }
+#endif
+
+        if (unicode_putchar(&v, &outpos, ch) < 0)
+            goto onError;
     }
 
     if (PyUnicode_Resize(&v, outpos) < 0)