end = s + size;
while (s < end) {
- if (end-s < Py_UNICODE_SIZE) {
+ Py_UNICODE uch;
+ Py_UCS4 ch;
++ if (end - s < Py_UNICODE_SIZE) {
+ endinpos = end-starts;
+ reason = "truncated input";
+ goto error;
+ }
- memcpy(p, s, sizeof(Py_UNICODE));
+ /* We copy the raw representation one byte at a time because the
+ pointer may be unaligned (see test_codeccallbacks). */
+ ((char *) &uch)[0] = s[0];
+ ((char *) &uch)[1] = s[1];
+#ifdef Py_UNICODE_WIDE
+ ((char *) &uch)[2] = s[2];
+ ((char *) &uch)[3] = s[3];
+#endif
+ ch = uch;
-
+ #ifdef Py_UNICODE_WIDE
/* We have to sanity check the raw data, otherwise doom looms for
some malformed UCS-4 data. */
- if (
- #ifdef Py_UNICODE_WIDE
- ch > 0x10ffff ||
- #endif
- end-s < Py_UNICODE_SIZE
- )
- {
- startinpos = s - starts;
- if (end-s < Py_UNICODE_SIZE) {
- endinpos = end-starts;
- reason = "truncated input";
- }
- else {
- endinpos = s - starts + Py_UNICODE_SIZE;
- reason = "illegal code point (> 0x10FFFF)";
- }
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "unicode_internal", reason,
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos))
- goto onError;
- continue;
- if (*p > unimax || *p < 0) {
++ if (ch > 0x10ffff) {
+ endinpos = s - starts + Py_UNICODE_SIZE;
+ reason = "illegal code point (> 0x10FFFF)";
+ goto error;
}
-
+ #endif
- p++;
s += Py_UNICODE_SIZE;
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end)
+#ifndef Py_UNICODE_WIDE
- outpos = p - PyUnicode_AS_UNICODE(v);
++ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE)
+ {
+ Py_UNICODE uch2;
+ ((char *) &uch2)[0] = s[0];
+ ((char *) &uch2)[1] = s[1];
+ if (Py_UNICODE_IS_LOW_SURROGATE(uch2))
+ {
+ ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2);
+ s += Py_UNICODE_SIZE;
+ }
+ }
+#endif
+
+ if (unicode_putchar(&v, &outpos, ch) < 0)
+ goto onError;
+ continue;
+
+ error:
+ startinpos = s - starts;
- &v, &outpos, &p)) {
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "unicode_internal", reason,
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
- }
++ &v, &outpos))
+ goto onError;
}
- if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
+ if (unicode_resize(&v, outpos) < 0)
goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);