Fix gdb/libpython.py for not ready Unicode strings

author Victor Stinner <victor.stinner@haypocalc.com>

Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)

committer Victor Stinner <victor.stinner@haypocalc.com>

Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)
author Victor Stinner <victor.stinner@haypocalc.com>
Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)
committer Victor Stinner <victor.stinner@haypocalc.com>
Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index 8e29f342b3616d8ace66a88ea9d433285e30200f..58c1f55455608a88da2019c5ff6d52d860824b2b 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -231,22 +231,24 @@ typedef struct {
           * utf8_length = 0 if utf8 is NULL
           * wstr is shared with data and wstr_length=length
             if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
-           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
           * wstr_length = 0 if wstr is NULL
           * (data starts just after the structure)
  
         - legacy string, not ready:
  
           * structure = PyUnicodeObject
+         * length = 0 (use wstr_length)
+         * hash = -1
           * kind = PyUnicode_WCHAR_KIND
           * compact = 0
           * ascii = 0
           * ready = 0
+         * interned = SSTATE_NOT_INTERNED
           * wstr is not NULL
           * data.any is NULL
           * utf8 is NULL
           * utf8_length = 0
-         * interned = SSTATE_NOT_INTERNED
  
         - legacy string, ready:
  
@@ -258,7 +260,7 @@ typedef struct {
           * data.any is not NULL
           * utf8 is shared and utf8_length = length with data.any if ascii = 1
           * utf8_length = 0 if utf8 is NULL
-         * wstr is shared and wstr_length = length with data.any
+         * wstr is shared with data.any and wstr_length = length
             if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
             or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
           * wstr_length = 0 if wstr is NULL
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index fa6563b0a416e52e3f935e1c0f5f6233360c29fa..ef9bb03a2e4644e936ddd9c35f93e6b7e9ebd4b3 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -328,18 +328,21 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
              assert(ascii->state.ascii == 0);
              assert(ascii->state.ready == 1);
              assert (compact->utf8 != data);
-        } else {
+        }
+        else {
              PyUnicodeObject *unicode = (PyUnicodeObject *)op;
  
              data = unicode->data.any;
              if (kind == PyUnicode_WCHAR_KIND) {
+                assert(ascii->length == 0);
+                assert(ascii->hash == -1);
                  assert(ascii->state.compact == 0);
                  assert(ascii->state.ascii == 0);
                  assert(ascii->state.ready == 0);
+                assert(ascii->state.interned == SSTATE_NOT_INTERNED);
                  assert(ascii->wstr != NULL);
                  assert(data == NULL);
                  assert(compact->utf8 == NULL);
-                assert(ascii->state.interned == SSTATE_NOT_INTERNED);
              }
              else {
                  assert(kind == PyUnicode_1BYTE_KIND
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py

index 43a0f20ec140903082fe52113273ff9381a3c6ae..3fea68f8e3902cd25880755f89e2381ac57edd71 100644 (file)
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1123,9 +1123,6 @@ class PyUnicodeObjectPtr(PyObjectPtr):
          return _type_Py_UNICODE.sizeof
  
      def proxyval(self, visited):
-        # From unicodeobject.h:
-        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
-        #     Py_UNICODE *str;    /* Raw Unicode buffer */
          if _is_pep393:
              # Python 3.3 and newer
              may_have_surrogates = False
@@ -1138,8 +1135,6 @@ class PyUnicodeObjectPtr(PyObjectPtr):
                  # string is not ready
                  may_have_surrogates = True
                  field_str = ascii['wstr']
-                if not is_compact_ascii:
-                    field_length = compact('wstr_length')
              else:
                  if is_compact_ascii:
                      field_str = ascii.address + 1
author	Victor Stinner <victor.stinner@haypocalc.com>
	Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Fri, 4 Nov 2011 19:54:05 +0000 (20:54 +0100)
Include/unicodeobject.h		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Tools/gdb/libpython.py		patch \| blob \| history