From: Victor Stinner <victor.stinner@haypocalc.com>
Date: Sun, 20 Nov 2011 17:56:05 +0000 (+0100)
Subject: _PyUnicode_CheckConsistency() also checks maxchar maximum value,
X-Git-Tag: v3.3.0a1~794
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=77faf69ca1e024cd48c82c882bfcad34be05da63;p=python

_PyUnicode_CheckConsistency() also checks maxchar maximum value,
not only its minimum value
---

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 71404f3ae7..6a31e48836 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -303,19 +303,22 @@ typedef struct {
            - PyUnicode_1BYTE_KIND (1):
 
              * character type = Py_UCS1 (8 bits, unsigned)
-             * if ascii is set, all characters must be in range
-               U+0000-U+007F, otherwise at least one character must be in range
+             * all characters are in the range U+0000-U+00FF (latin1)
+             * if ascii is set, all characters are in the range U+0000-U+007F
+               (ASCII), otherwise at least one character is in the range
                U+0080-U+00FF
 
            - PyUnicode_2BYTE_KIND (2):
 
              * character type = Py_UCS2 (16 bits, unsigned)
-             * at least one character must be in range U+0100-U+FFFF
+             * all characters are in the range U+0000-U+FFFF (BMP)
+             * at least one character is in the range U+0100-U+FFFF
 
            - PyUnicode_4BYTE_KIND (4):
 
              * character type = Py_UCS4 (32 bits, unsigned)
-             * at least one character must be in range U+10000-U+10FFFF
+             * all characters are in the range U+0000-U+10FFFF
+             * at least one character is in the range U+10000-U+10FFFF
          */
         unsigned int kind:3;
         /* Compact is with respect to the allocation scheme. Compact unicode
@@ -323,7 +326,7 @@ typedef struct {
            one block for the PyUnicodeObject struct and another for its data
            buffer. */
         unsigned int compact:1;
-        /* The string only contains characters in range U+0000-U+007F (ASCII)
+        /* The string only contains characters in the range U+0000-U+007F (ASCII)
            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
            set, use the PyASCIIObject structure. */
         unsigned int ascii:1;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 18780ea4a4..9c1705d0dd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -392,15 +392,21 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
                 maxchar = ch;
         }
         if (kind == PyUnicode_1BYTE_KIND) {
-            if (ascii->state.ascii == 0)
+            if (ascii->state.ascii == 0) {
                 assert(maxchar >= 128);
+                assert(maxchar <= 255);
+            }
             else
                 assert(maxchar < 128);
         }
-        else if (kind == PyUnicode_2BYTE_KIND)
+        else if (kind == PyUnicode_2BYTE_KIND) {
             assert(maxchar >= 0x100);
-        else
+            assert(maxchar <= 0xFFFF);
+        }
+        else {
             assert(maxchar >= 0x10000);
+            assert(maxchar <= 0x10FFFF);
+        }
     }
     if (check_content && !unicode_is_singleton(op))
         assert(ascii->hash == -1);