onError:
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
- Py_DECREF(unicode);
+ Py_XDECREF(unicode);
return NULL;
}
- return unicode_fromascii(s, size);
+#undef WRITE_MAYBE_FAIL
+
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
+ Py_UCS4 maxchar = 0;
+ Py_ssize_t unicode_size;
+ int has_errors = 0;
+ PyObject *unicode;
+ int kind;
+ void *data;
+ const char *starts = s;
+ const char *e;
+ Py_ssize_t i;
+
+ if (size == 0) {
+ if (consumed)
+ *consumed = 0;
+ Py_INCREF(unicode_empty);
+ return unicode_empty;
+ }
+
+ maxchar = utf8_scanner((const unsigned char *)s, size, &unicode_size);
+
+ /* When the string is ASCII only, just use memcpy and return.
+ unicode_size may be != size if there is an incomplete UTF-8
+ sequence at the end of the ASCII block. */
+ if (maxchar < 128 && size == unicode_size) {
+ if (consumed)
+ *consumed = size;
++ return unicode_fromascii((const unsigned char *)s, size);
+ }
+
+ unicode = PyUnicode_New(unicode_size, maxchar);
+ if (!unicode)
+ return NULL;
+ kind = PyUnicode_KIND(unicode);
+ data = PyUnicode_DATA(unicode);
+
+ /* Unpack UTF-8 encoded data */
+ i = 0;
+ e = starts + size;
+ switch (kind) {
+ case PyUnicode_1BYTE_KIND:
+ has_errors = ucs1lib_utf8_try_decode(s, e, (Py_UCS1 *) data, &s, &i);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ has_errors = ucs2lib_utf8_try_decode(s, e, (Py_UCS2 *) data, &s, &i);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ has_errors = ucs4lib_utf8_try_decode(s, e, (Py_UCS4 *) data, &s, &i);
+ break;
+ }
+ if (!has_errors) {
+ /* Ensure the unicode size calculation was correct */
+ assert(i == unicode_size);
+ assert(s == e);
+ if (consumed)
+ *consumed = size;
+ return unicode;
+ }
-#undef ASCII_CHAR_MASK
+ /* In case of errors, maxchar and size computation might be incorrect;
+ code below refits and resizes as necessary. */
+ return decode_utf8_errors(starts, size, errors, consumed, s, unicode, i);
+}
#ifdef __APPLE__