]> granicus.if.org Git - python/commitdiff
Issue #17043: The unicode-internal decoder no longer read past the end of
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 7 Feb 2013 14:23:21 +0000 (16:23 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Thu, 7 Feb 2013 14:23:21 +0000 (16:23 +0200)
input buffer.

Misc/NEWS
Objects/unicodeobject.c

index 83f52afda251c5301419328f3b93853b7afd3f91..c715170df24b2b18473dc4e4173492d686eb40b4 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.4
 Core and Builtins
 -----------------
 
+- Issue #17043: The unicode-internal decoder no longer read past the end of
+  input buffer.
+
 - Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder.
 
 - Issue #10156: In the interpreter's initialization phase, unicode globals
index 3a288d845b8eaa385580ee6f53b84350caca1afa..cd4e9e9295c854e3792ea5f7e07233e51658d83d 100644 (file)
@@ -4392,37 +4392,34 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
     end = s + size;
 
     while (s < end) {
+        if (end-s < Py_UNICODE_SIZE) {
+            endinpos = end-starts;
+            reason = "truncated input";
+            goto error;
+        }
         memcpy(p, s, sizeof(Py_UNICODE));
+#ifdef Py_UNICODE_WIDE
         /* We have to sanity check the raw data, otherwise doom looms for
            some malformed UCS-4 data. */
-        if (
-#ifdef Py_UNICODE_WIDE
-            *p > unimax || *p < 0 ||
-#endif
-            end-s < Py_UNICODE_SIZE
-            )
-        {
-            startinpos = s - starts;
-            if (end-s < Py_UNICODE_SIZE) {
-                endinpos = end-starts;
-                reason = "truncated input";
-            }
-            else {
-                endinpos = s - starts + Py_UNICODE_SIZE;
-                reason = "illegal code point (> 0x10FFFF)";
-            }
-            outpos = p - PyUnicode_AS_UNICODE(v);
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "unicode_internal", reason,
-                    &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos, &p)) {
-                goto onError;
-            }
+        if (*p > unimax || *p < 0) {
+            endinpos = s - starts + Py_UNICODE_SIZE;
+            reason = "illegal code point (> 0x10FFFF)";
+            goto error;
         }
-        else {
-            p++;
-            s += Py_UNICODE_SIZE;
+#endif
+        p++;
+        s += Py_UNICODE_SIZE;
+        continue;
+
+  error:
+        startinpos = s - starts;
+        outpos = p - PyUnicode_AS_UNICODE(v);
+        if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "unicode_internal", reason,
+                &starts, &end, &startinpos, &endinpos, &exc, &s,
+                &v, &outpos, &p)) {
+            goto onError;
         }
     }