+-----------+--------------------+-------------------+-----------------------+-------+
| ``'B'`` | unsigned char | int | 1 | |
+-----------+--------------------+-------------------+-----------------------+-------+
-| ``'u'`` | Py_UCS4 | Unicode character | 4 | |
+| ``'u'`` | Py_UNICODE | Unicode character | 2 | \(1) |
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'h'`` | signed short | int | 2 | |
+-----------+--------------------+-------------------+-----------------------+-------+
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'L'`` | unsigned long | int | 4 | |
+-----------+--------------------+-------------------+-----------------------+-------+
-| ``'q'`` | signed long long | int | 8 | \(1) |
+| ``'q'`` | signed long long | int | 8 | \(2) |
+-----------+--------------------+-------------------+-----------------------+-------+
-| ``'Q'`` | unsigned long long | int | 8 | \(1) |
+| ``'Q'`` | unsigned long long | int | 8 | \(2) |
+-----------+--------------------+-------------------+-----------------------+-------+
| ``'f'`` | float | float | 4 | |
+-----------+--------------------+-------------------+-----------------------+-------+
Notes:
(1)
+ The ``'u'`` type code corresponds to Python's unicode character
+ (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
+ platform, it can be 16 bits or 32 bits.
+
+(2)
The ``'q'`` and ``'Q'`` type codes are available only if
the platform C compiler used to build Python supports C :c:type:`long long`,
or, on Windows, :c:type:`__int64`.
static PyObject *
u_getitem(arrayobject *ap, Py_ssize_t i)
{
- return PyUnicode_FromOrdinal(((Py_UCS4 *) ap->ob_item)[i]);
+ return PyUnicode_FromUnicode(&((Py_UNICODE *) ap->ob_item)[i], 1);
}
static int
u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
{
- PyObject *p;
+ Py_UNICODE *p;
+ Py_ssize_t len;
- if (!PyArg_Parse(v, "U;array item must be unicode character", &p))
- return -1;
- if (PyUnicode_READY(p))
+ if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
return -1;
- if (PyUnicode_GET_LENGTH(p) != 1) {
+ if (len != 1) {
PyErr_SetString(PyExc_TypeError,
"array item must be unicode character");
return -1;
}
if (i >= 0)
- ((Py_UCS4 *)ap->ob_item)[i] = PyUnicode_READ_CHAR(p, 0);
+ ((Py_UNICODE *)ap->ob_item)[i] = p[0];
return 0;
}
return 0;
}
-#if SIZEOF_INT == 4
-# define STRUCT_LONG_FORMAT "I"
-#elif SIZEOF_LONG == 4
-# define STRUCT_LONG_FORMAT "L"
-#else
-# error "Unable to get struct format for Py_UCS4"
-#endif
/* Description of types.
*
static struct arraydescr descriptors[] = {
{'b', 1, b_getitem, b_setitem, "b", 1, 1},
{'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
- {'u', sizeof(Py_UCS4), u_getitem, u_setitem, STRUCT_LONG_FORMAT, 0, 0},
+ {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
{'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
{'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
{'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
static PyObject *
array_fromunicode(arrayobject *self, PyObject *args)
{
- PyObject *ustr;
+ Py_UNICODE *ustr;
Py_ssize_t n;
+ char typecode;
- if (!PyArg_ParseTuple(args, "U:fromunicode", &ustr))
+ if (!PyArg_ParseTuple(args, "u#:fromunicode", &ustr, &n))
return NULL;
- if (self->ob_descr->typecode != 'u') {
+ typecode = self->ob_descr->typecode;
+ if ((typecode != 'u')) {
PyErr_SetString(PyExc_ValueError,
"fromunicode() may only be called on "
"unicode type arrays");
return NULL;
}
- if (PyUnicode_READY(ustr))
- return NULL;
- n = PyUnicode_GET_LENGTH(ustr);
if (n > 0) {
Py_ssize_t old_size = Py_SIZE(self);
if (array_resize(self, old_size + n) == -1)
return NULL;
- if (!PyUnicode_AsUCS4(ustr, (Py_UCS4 *)self->ob_item + old_size, n, 0))
- return NULL;
+ memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
+ ustr, n * sizeof(Py_UNICODE));
}
Py_INCREF(Py_None);
static PyObject *
array_tounicode(arrayobject *self, PyObject *unused)
{
- if (self->ob_descr->typecode != 'u') {
+ char typecode;
+ typecode = self->ob_descr->typecode;
+ if ((typecode != 'u')) {
PyErr_SetString(PyExc_ValueError,
"tounicode() may only be called on unicode type arrays");
return NULL;
}
- return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- (Py_UCS4 *) self->ob_item,
- Py_SIZE(self));
+ return PyUnicode_FromUnicode((Py_UNICODE *) self->ob_item, Py_SIZE(self));
}
PyDoc_STRVAR(tounicode_doc,
return UNSIGNED_INT8;
case 'u':
- return UTF32_LE + is_big_endian;
+ if (sizeof(Py_UNICODE) == 2) {
+ return UTF16_LE + is_big_endian;
+ }
+ if (sizeof(Py_UNICODE) == 4) {
+ return UTF32_LE + is_big_endian;
+ }
+ return UNKNOWN_FORMAT;
case 'f':
if (sizeof(float) == 4) {
view->strides = &(view->itemsize);
view->format = NULL;
view->internal = NULL;
- if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
+ if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
view->format = self->ob_descr->formats;
+#ifdef Py_UNICODE_WIDE
+ if (self->ob_descr->typecode == 'u') {
+ view->format = "w";
+ }
+#endif
+ }
finish:
self->ob_exports++;
Py_DECREF(v);
}
else if (initial != NULL && PyUnicode_Check(initial)) {
+ Py_UNICODE *ustr;
Py_ssize_t n;
- if (PyUnicode_READY(initial)) {
+
+ ustr = PyUnicode_AsUnicode(initial);
+ if (ustr == NULL) {
+ PyErr_NoMemory();
Py_DECREF(a);
return NULL;
}
- n = PyUnicode_GET_LENGTH(initial);
+
+ n = PyUnicode_GET_DATA_SIZE(initial);
if (n > 0) {
arrayobject *self = (arrayobject *)a;
- Py_UCS4 *item = (Py_UCS4 *)self->ob_item;
- item = (Py_UCS4 *)PyMem_Realloc(item, n * sizeof(Py_UCS4));
+ char *item = self->ob_item;
+ item = (char *)PyMem_Realloc(item, n);
if (item == NULL) {
PyErr_NoMemory();
Py_DECREF(a);
return NULL;
}
- self->ob_item = (char*)item;
- Py_SIZE(self) = n;
- if (!PyUnicode_AsUCS4(initial, item, n, 0))
- return NULL;
+ self->ob_item = item;
+ Py_SIZE(self) = n / sizeof(Py_UNICODE);
+ memcpy(item, ustr, n);
self->allocated = Py_SIZE(self);
}
}
Type code C Type Minimum size in bytes \n\
'b' signed integer 1 \n\
'B' unsigned integer 1 \n\
- 'u' Unicode character 4 \n\
+ 'u' Unicode character 2 (see note) \n\
'h' signed integer 2 \n\
'H' unsigned integer 2 \n\
'i' signed integer 2 \n\
'f' floating point 4 \n\
'd' floating point 8 \n\
\n\
+NOTE: The 'u' typecode corresponds to Python's unicode character. On \n\
+narrow builds this is 2-bytes on wide builds this is 4-bytes.\n\
+\n\
NOTE: The 'q' and 'Q' type codes are only available if the platform \n\
C compiler used to build Python supports 'long long', or, on Windows, \n\
'__int64'.\n\