]> granicus.if.org Git - python/commitdiff
Issue #13441: _PyUnicode_CheckConsistency() dumps the string if the maximum
authorVictor Stinner <victor.stinner@haypocalc.com>
Mon, 21 Nov 2011 13:31:41 +0000 (14:31 +0100)
committerVictor Stinner <victor.stinner@haypocalc.com>
Mon, 21 Nov 2011 13:31:41 +0000 (14:31 +0100)
character is bigger than U+10FFFF and locale.localeconv() dumps the string
before decoding it.

Temporary hack to debug the issue #13441.

Modules/_localemodule.c
Objects/unicodeobject.c

index 9bba1b39cf7c3355fd6a29b57ac2c6fe664e6aaf..236442f05bc459e1ad2ae0b5e9aa69303017f9c5 100644 (file)
@@ -79,6 +79,23 @@ str2uni(const char* s)
     return res2;
 }
 
+#ifdef Py_DEBUG
+void
+dump_str(const char *name, const char *value)
+{
+    size_t i, len = strlen(value);
+    printf("Decode localeconv() %s: {", name);
+    for (i=0; i<len; i++) {
+        unsigned char ch = value[i];
+        if (i)
+            printf(" 0x%02x", ch);
+        else
+            printf("0x%02x", ch);
+    }
+    printf("} (len=%u)\n", len);
+}
+#endif
+
 /* support functions for formatting floating point numbers */
 
 PyDoc_STRVAR(setlocale__doc__,
@@ -184,11 +201,20 @@ PyLocale_localeconv(PyObject* self)
     /* hopefully, the localeconv result survives the C library calls
        involved herein */
 
+#ifdef Py_DEBUG
+#define RESULT_STRING(s)\
+    dump_str(#s, l->s); \
+    x = str2uni(l->s);   \
+    if (!x) goto failed;\
+    PyDict_SetItemString(result, #s, x);\
+    Py_XDECREF(x)
+#else
 #define RESULT_STRING(s)\
     x = str2uni(l->s);   \
     if (!x) goto failed;\
     PyDict_SetItemString(result, #s, x);\
     Py_XDECREF(x)
+#endif
 
 #define RESULT_INT(i)\
     x = PyLong_FromLong(l->i);\
index 6798ef814523d16e23079396a641033b9eb17676..6307a983e51ad3f7d2ecaea27dc65937448eeda3 100644 (file)
@@ -391,6 +391,19 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
             if (ch > maxchar)
                 maxchar = ch;
         }
+        if (maxchar > 0x10FFFF) {
+            printf("Invalid Unicode string! {");
+            for (i=0; i < ascii->length; i++)
+            {
+                Py_UCS4 ch = PyUnicode_READ(kind, data, i);
+                if (i)
+                    printf(", U+%04x", ch);
+                else
+                    printf("U+%04x", ch);
+            }
+            printf("} (len=%u)\n", ascii->length);
+            abort();
+        }
         if (kind == PyUnicode_1BYTE_KIND) {
             if (ascii->state.ascii == 0) {
                 assert(maxchar >= 128);