Close #13072: Restore code before the PEP 393 for the array module

author Victor Stinner <victor.stinner@gmail.com>

Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)

committer Victor Stinner <victor.stinner@gmail.com>

Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)
author Victor Stinner <victor.stinner@gmail.com>
Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)
committer Victor Stinner <victor.stinner@gmail.com>
Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)
diff --git a/Doc/library/array.rst b/Doc/library/array.rst

index 3e275a262047ff3da4e5e279e4ee9f2bffafceeb..2eb926c5345a21b7099da22bee26ed479a07d5c7 100644 (file)
--- a/Doc/library/array.rst
+++ b/Doc/library/array.rst
@@ -21,7 +21,7 @@ defined:
  +-----------+--------------------+-------------------+-----------------------+-------+
  | ``'B'``   | unsigned char      | int               | 1                     |       |
  +-----------+--------------------+-------------------+-----------------------+-------+
-| ``'u'``   | Py_UCS4            | Unicode character | 4                     |       |
+| ``'u'``   | Py_UNICODE         | Unicode character | 2                     | \(1)  |
  +-----------+--------------------+-------------------+-----------------------+-------+
  | ``'h'``   | signed short       | int               | 2                     |       |
  +-----------+--------------------+-------------------+-----------------------+-------+
@@ -35,9 +35,9 @@ defined:
  +-----------+--------------------+-------------------+-----------------------+-------+
  | ``'L'``   | unsigned long      | int               | 4                     |       |
  +-----------+--------------------+-------------------+-----------------------+-------+
-| ``'q'``   | signed long long   | int               | 8                     | \(1)  |
+| ``'q'``   | signed long long   | int               | 8                     | \(2)  |
  +-----------+--------------------+-------------------+-----------------------+-------+
-| ``'Q'``   | unsigned long long | int               | 8                     | \(1)  |
+| ``'Q'``   | unsigned long long | int               | 8                     | \(2)  |
  +-----------+--------------------+-------------------+-----------------------+-------+
  | ``'f'``   | float              | float             | 4                     |       |
  +-----------+--------------------+-------------------+-----------------------+-------+
@@ -47,6 +47,11 @@ defined:
  Notes:
  
  (1)
+   The ``'u'`` type code corresponds to Python's unicode character
+   (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
+   platform, it can be 16 bits or 32 bits.
+
+(2)
     The ``'q'`` and ``'Q'`` type codes are available only if
     the platform C compiler used to build Python supports C :c:type:`long long`,
     or, on Windows, :c:type:`__int64`.
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c

index b0921c85a550e0402e7d99b61ab5ab738ce0062d..f0615c995c535dc8aab46f1fda0548add9810d12 100644 (file)
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c
@@ -174,25 +174,24 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
  static PyObject *
  u_getitem(arrayobject *ap, Py_ssize_t i)
  {
-    return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
+    return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
  }
  
  static int
  u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
  {
-    PyObject *p;
+    Py_UNICODE *p;
+    Py_ssize_t len;
  
-    if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
-        return -1;
-    if (PyUnicode_READY(p))
+    if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
          return -1;
-    if (PyUnicode_GET_LENGTH(p) != 1) {
+    if (len != 1) {
          PyErr_SetString(PyExc_TypeError,
                          "array item must be unicode character");
          return -1;
      }
      if (i >= 0)
-        ((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
+        ((Py_UNICODE *)ap->ob_item)[i] = p[0];
      return 0;
  }
  
@@ -444,13 +443,6 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
      return 0;
  }
  
-#if SIZEOF_INT == 4
-#  define STRUCT_LONG_FORMAT "I"
-#elif SIZEOF_LONG == 4
-#  define STRUCT_LONG_FORMAT "L"
-#else
-#  error "Unable to get struct format for Py_UCS4"
-#endif
  
  /* Description of types.
   *
@@ -460,7 +452,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
  static struct arraydescr descriptors[] = {
      {'b', 1, b_getitem, b_setitem, "b", 1, 1},
      {'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
-    {'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
+    {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
      {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
      {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
      {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
@@ -1519,26 +1511,25 @@ This method is deprecated. Use tobytes instead.");
  static PyObject *
  array_fromunicode(arrayobject *self, PyObject *args)
  {
-    PyObject *ustr;
+    Py_UNICODE *ustr;
      Py_ssize_t n;
+    char typecode;
  
-    if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
+    if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
          return NULL;
-    if (self->ob_descr->typecode != 'u') {
+    typecode = self->ob_descr->typecode;
+    if ((typecode != 'u')) {
          PyErr_SetString(PyExc_ValueError,
              "fromunicode() may only be called on "
              "unicode type arrays");
          return NULL;
      }
-    if (PyUnicode_READY(ustr))
-        return NULL;
-    n = PyUnicode_GET_LENGTH(ustr);
      if (n > 0) {
          Py_ssize_t old_size = Py_SIZE(self);
          if (array_resize(self, old_size + n) == -1)
              return NULL;
-        if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
-            return NULL;
+        memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
+               ustr, n * sizeof(Py_UNICODE));
      }
  
      Py_INCREF(Py_None);
@@ -1557,14 +1548,14 @@ append Unicode data to an array of some other type.");
  static PyObject *
  array_tounicode(arrayobject *self, PyObject *unused)
  {
-    if (self->ob_descr->typecode != 'u') {
+    char typecode;
+    typecode = self->ob_descr->typecode;
+    if ((typecode != 'u')) {
          PyErr_SetString(PyExc_ValueError,
               "tounicode() may only be called on unicode type arrays");
          return NULL;
      }
-    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
-                                     (Py_UCS4 *) self->ob_item,
-                                     Py_SIZE(self));
+    return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
  }
  
  PyDoc_STRVAR(tounicode_doc,
@@ -1671,7 +1662,13 @@ typecode_to_mformat_code(char typecode)
          return UNSIGNED_INT8;
  
      case 'u':
-        return UTF32_LE + is_big_endian;
+        if (sizeof(Py_UNICODE) == 2) {
+            return UTF16_LE + is_big_endian;
+        }
+        if (sizeof(Py_UNICODE) == 4) {
+            return UTF32_LE + is_big_endian;
+        }
+        return UNKNOWN_FORMAT;
  
      case 'f':
          if (sizeof(float) == 4) {
@@ -2419,8 +2416,14 @@ array_buffer_getbuf(arrayobject *self, Py_buffer *view, int flags)
          view->strides = &(view->itemsize);
      view->format = NULL;
      view->internal = NULL;
-    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
+    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
          view->format = self->ob_descr->formats;
+#ifdef Py_UNICODE_WIDE
+        if (self->ob_descr->typecode == 'u') {
+            view->format = "w";
+        }
+#endif
+    }
  
   finish:
      self->ob_exports++;
@@ -2534,25 +2537,29 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
                  Py_DECREF(v);
              }
              else if (initial != NULL && PyUnicode_Check(initial))  {
+                Py_UNICODE *ustr;
                  Py_ssize_t n;
-                if (PyUnicode_READY(initial)) {
+
+                ustr = PyUnicode_AsUnicode(initial);
+                if (ustr == NULL) {
+                    PyErr_NoMemory();
                      Py_DECREF(a);
                      return NULL;
                  }
-                n = PyUnicode_GET_LENGTH(initial);
+
+                n = PyUnicode_GET_DATA_SIZE(initial);
                  if (n > 0) {
                      arrayobject *self = (arrayobject *)a;
-                    Py_UCS4 *item = (Py_UCS4 *)self->ob_item;
-                    item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));
+                    char *item = self->ob_item;
+                    item = (char *)PyMem_Realloc(item, n);
                      if (item == NULL) {
                          PyErr_NoMemory();
                          Py_DECREF(a);
                          return NULL;
                      }
-                    self->ob_item = (char*)item;
-                    Py_SIZE(self) = n;
-                    if (!PyUnicode_AsUCS4(initial, item, n, 0))
-                        return NULL;
+                    self->ob_item = item;
+                    Py_SIZE(self) = n / sizeof(Py_UNICODE);
+                    memcpy(item, ustr, n);
                      self->allocated = Py_SIZE(self);
                  }
              }
@@ -2593,7 +2600,7 @@ is a single character.  The following type codes are defined:\n\
      Type code   C Type             Minimum size in bytes \n\
      'b'         signed integer     1 \n\
      'B'         unsigned integer   1 \n\
-    'u'         Unicode character  4 \n\
+    'u'         Unicode character  2 (see note) \n\
      'h'         signed integer     2 \n\
      'H'         unsigned integer   2 \n\
      'i'         signed integer     2 \n\
@@ -2605,6 +2612,9 @@ is a single character.  The following type codes are defined:\n\
      'f'         floating point     4 \n\
      'd'         floating point     8 \n\
  \n\
+NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
+narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
+\n\
  NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
  C compiler used to build Python supports 'long long', or, on Windows, \n\
  '__int64'.\n\
author	Victor Stinner <victor.stinner@gmail.com>
	Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)
committer	Victor Stinner <victor.stinner@gmail.com>
	Sun, 5 Aug 2012 22:46:05 +0000 (00:46 +0200)
Doc/library/array.rst		patch \| blob \| history
Modules/arraymodule.c		patch \| blob \| history