Check newly created consistency using _PyUnicode_CheckConsistency(str, 1)

author Victor Stinner <victor.stinner@gmail.com>

Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)

committer Victor Stinner <victor.stinner@gmail.com>

Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)
author Victor Stinner <victor.stinner@gmail.com>
Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)
committer Victor Stinner <victor.stinner@gmail.com>
Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)
diff --git a/Modules/_json.c b/Modules/_json.c

index 95c658ca7c5c861e16b12f4ccda3928322e36004..40c2ced5028f6702900419fd7d7e8161bea5a30e 100644 (file)
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -246,6 +246,7 @@ ascii_escape_unicode(PyObject *pystr)
          }
      }
      output[chars++] = '"';
+    assert(_PyUnicode_CheckConsistency(rval, 1));
      return rval;
  }
  
diff --git a/Modules/md5module.c b/Modules/md5module.c

index 86f602ebe5ef9895523909004e67515bae3696a1..ee44c4878d191c3be296969199ea35f4e067a629 100644 (file)
--- a/Modules/md5module.c
+++ b/Modules/md5module.c
@@ -397,6 +397,7 @@ MD5_hexdigest(MD5object *self, PyObject *unused)
          c = (digest[i] & 0xf);
          hex_digest[j++] = Py_hexdigits[c];
      }
+    assert(_PyUnicode_CheckConsistency(retval, 1));
      return retval;
  }
  
diff --git a/Modules/sha1module.c b/Modules/sha1module.c

index 30e5c5018a3f4cb6df44f2705f2f27f13dc3bef6..daea887960062b1cfae3e27c8a2bb67093366719 100644 (file)
--- a/Modules/sha1module.c
+++ b/Modules/sha1module.c
@@ -373,6 +373,7 @@ SHA1_hexdigest(SHA1object *self, PyObject *unused)
          c = (digest[i] & 0xf);
          hex_digest[j++] = Py_hexdigits[c];
      }
+    assert(_PyUnicode_CheckConsistency(retval, 1));
      return retval;
  }
  
diff --git a/Modules/sha256module.c b/Modules/sha256module.c

index f1ef3293666a48a87ac3fd150e6ee1100bf407f4..76d91afda37aaea229b66833845bccf9166446ad 100644 (file)
--- a/Modules/sha256module.c
+++ b/Modules/sha256module.c
@@ -466,6 +466,7 @@ SHA256_hexdigest(SHAobject *self, PyObject *unused)
          c = (digest[i] & 0xf);
          hex_digest[j++] = Py_hexdigits[c];
      }
+    assert(_PyUnicode_CheckConsistency(retval, 1));
      return retval;
  }
  
diff --git a/Modules/sha512module.c b/Modules/sha512module.c

index 4f5a1139ee2a248f03d2361222e618e57d947b99..88f8a64d0624d302d75ed98b3a5830624145129f 100644 (file)
--- a/Modules/sha512module.c
+++ b/Modules/sha512module.c
@@ -532,6 +532,7 @@ SHA512_hexdigest(SHAobject *self, PyObject *unused)
          c = (digest[i] & 0xf);
          hex_digest[j++] = Py_hexdigits[c];
      }
+    assert(_PyUnicode_CheckConsistency(retval, 1));
      return retval;
  }
  
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c

index 2e6be431c91e72fbb81a3d76c3c2bb6b218b1e82..b07be26896fcd2c9589ffa2f50a65df7a1cf753d 100644 (file)
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -626,6 +626,7 @@ PyBytes_Repr(PyObject *obj, int smartquotes)
              *p++ = c;
      }
      *p++ = quote;
+    assert(_PyUnicode_CheckConsistency(v, 1));
      return v;
  }
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 364de90877c24c34c40c32416713e0b946e4cc50..60b0a1fbbd9faeeca003c63fe4e4400a8dad91c2 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -967,7 +967,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
      PyObject *obj;
      PyCompactUnicodeObject *unicode;
      void *data;
-    int kind_state;
+    enum PyUnicode_Kind kind;
      int is_sharing, is_ascii;
      Py_ssize_t char_size;
      Py_ssize_t struct_size;
@@ -986,17 +986,17 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
      is_sharing = 0;
      struct_size = sizeof(PyCompactUnicodeObject);
      if (maxchar < 128) {
-        kind_state = PyUnicode_1BYTE_KIND;
+        kind = PyUnicode_1BYTE_KIND;
          char_size = 1;
          is_ascii = 1;
          struct_size = sizeof(PyASCIIObject);
      }
      else if (maxchar < 256) {
-        kind_state = PyUnicode_1BYTE_KIND;
+        kind = PyUnicode_1BYTE_KIND;
          char_size = 1;
      }
      else if (maxchar < 65536) {
-        kind_state = PyUnicode_2BYTE_KIND;
+        kind = PyUnicode_2BYTE_KIND;
          char_size = 2;
          if (sizeof(wchar_t) == 2)
              is_sharing = 1;
@@ -1007,7 +1007,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
                              "invalid maximum character passed to PyUnicode_New");
              return NULL;
          }
-        kind_state = PyUnicode_4BYTE_KIND;
+        kind = PyUnicode_4BYTE_KIND;
          char_size = 4;
          if (sizeof(wchar_t) == 4)
              is_sharing = 1;
@@ -1041,7 +1041,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
      _PyUnicode_LENGTH(unicode) = size;
      _PyUnicode_HASH(unicode) = -1;
      _PyUnicode_STATE(unicode).interned = 0;
-    _PyUnicode_STATE(unicode).kind = kind_state;
+    _PyUnicode_STATE(unicode).kind = kind;
      _PyUnicode_STATE(unicode).compact = 1;
      _PyUnicode_STATE(unicode).ready = 1;
      _PyUnicode_STATE(unicode).ascii = is_ascii;
@@ -1049,19 +1049,19 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
          ((char*)data)[size] = 0;
          _PyUnicode_WSTR(unicode) = NULL;
      }
-    else if (kind_state == PyUnicode_1BYTE_KIND) {
+    else if (kind == PyUnicode_1BYTE_KIND) {
          ((char*)data)[size] = 0;
          _PyUnicode_WSTR(unicode) = NULL;
          _PyUnicode_WSTR_LENGTH(unicode) = 0;
          unicode->utf8 = NULL;
          unicode->utf8_length = 0;
-        }
+    }
      else {
          unicode->utf8 = NULL;
          unicode->utf8_length = 0;
-        if (kind_state == PyUnicode_2BYTE_KIND)
+        if (kind == PyUnicode_2BYTE_KIND)
              ((Py_UCS2*)data)[size] = 0;
-        else /* kind_state == PyUnicode_4BYTE_KIND */
+        else /* kind == PyUnicode_4BYTE_KIND */
              ((Py_UCS4*)data)[size] = 0;
          if (is_sharing) {
              _PyUnicode_WSTR_LENGTH(unicode) = size;
@@ -1072,6 +1072,13 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
              _PyUnicode_WSTR(unicode) = NULL;
          }
      }
+#ifdef Py_DEBUG
+    /* Fill the data with invalid characters to detect bugs earlier.
+       _PyUnicode_CheckConsistency(str, 1) detects invalid characters,
+       at least for ASCII and UCS-4 strings. U+00FF is invalid in ASCII
+       and U+FFFFFFFF is an invalid character in Unicode 6.0. */
+    memset(data, 0xff, size * kind);
+#endif
      assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
      return obj;
  }
diff --git a/Python/codecs.c b/Python/codecs.c

index 607feea81c39345b6a65c38dfaf7bb409af9f8e3..797a45f5a1ce0bd401321d1200edf61bab7fe074 100644 (file)
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -534,6 +534,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
          data = PyUnicode_DATA(res);
          for (i = 0; i < len; ++i)
              PyUnicode_WRITE(kind, data, i, '?');
+        assert(_PyUnicode_CheckConsistency(res, 1));
          return Py_BuildValue("(Nn)", res, end);
      }
      else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
@@ -559,6 +560,7 @@ PyObject *PyCodec_ReplaceErrors(PyObject *exc)
          data = PyUnicode_DATA(res);
          for (i=0; i < len; i++)
              PyUnicode_WRITE(kind, data, i, Py_UNICODE_REPLACEMENT_CHARACTER);
+        assert(_PyUnicode_CheckConsistency(res, 1));
          return Py_BuildValue("(Nn)", res, end);
      }
      else {
@@ -652,8 +654,8 @@ PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
              }
              *outp++ = ';';
          }
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
          Py_DECREF(object);
          return restuple;
      }
@@ -720,8 +722,8 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
              *outp++ = Py_hexdigits[c&0xf];
          }
  
-        restuple = Py_BuildValue("(On)", res, end);
-        Py_DECREF(res);
+        assert(_PyUnicode_CheckConsistency(res, 1));
+        restuple = Py_BuildValue("(Nn)", res, end);
          Py_DECREF(object);
          return restuple;
      }
diff --git a/Python/compile.c b/Python/compile.c

index 79d1d2167028b26064660ed1d6b62cb102e9f794..10e9ad27f56f5d9ff2cba806b024706d1b40380d 100644 (file)
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -263,6 +263,7 @@ _Py_Mangle(PyObject *privateobj, PyObject *ident)
          Py_DECREF(result);
          return NULL;
      }
+    assert(_PyUnicode_CheckConsistency(result, 1));
      return result;
  }
  
diff --git a/Python/import.c b/Python/import.c

index 8cf10e658c28c87877b4e207cae086856f14c03b..103e7de43939f4fadfc6bf116f5ad9d62ae06187 100644 (file)
--- a/Python/import.c
+++ b/Python/import.c
@@ -992,6 +992,7 @@ make_source_pathname(PyObject *path)
                               (j = dot0-right));
      PyUnicode_WRITE(kind, data, i+j,   'p');
      PyUnicode_WRITE(kind, data, i+j+1, 'y');
+    assert(_PyUnicode_CheckConsistency(result, 1));
      return result;
  }
author	Victor Stinner <victor.stinner@gmail.com>
	Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)
committer	Victor Stinner <victor.stinner@gmail.com>
	Fri, 27 Apr 2012 11:55:39 +0000 (13:55 +0200)
Modules/_json.c		patch \| blob \| history
Modules/md5module.c		patch \| blob \| history
Modules/sha1module.c		patch \| blob \| history
Modules/sha256module.c		patch \| blob \| history
Modules/sha512module.c		patch \| blob \| history
Objects/bytesobject.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Python/codecs.c		patch \| blob \| history
Python/compile.c		patch \| blob \| history
Python/import.c		patch \| blob \| history