]> granicus.if.org Git - python/commitdiff
Issue #24870: Add _PyUnicodeWriter_PrepareKind() macro
authorVictor Stinner <victor.stinner@gmail.com>
Mon, 21 Sep 2015 22:58:32 +0000 (00:58 +0200)
committerVictor Stinner <victor.stinner@gmail.com>
Mon, 21 Sep 2015 22:58:32 +0000 (00:58 +0200)
Add a macro which ensures that the writer has at least the requested kind.

Include/unicodeobject.h
Objects/unicodeobject.c

index 33e8f19af088b4576ebf3c445b9105bbb7f73d5f..d0e014261468625bddbdd10a08263efe01aa7294 100644 (file)
@@ -942,6 +942,23 @@ PyAPI_FUNC(int)
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar);
 
+/* Prepare the buffer to have at least the kind KIND.
+   For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+   support characters in range U+000-U+FFFF.
+
+   Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
+    (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
+     (KIND) <= (WRITER)->kind                                         \
+     ? 0                                                              \
+     : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+   macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+                                     enum PyUnicode_Kind kind);
+
 /* Append a Unicode character.
    Return 0 on success, raise an exception and return -1 on error. */
 PyAPI_FUNC(int)
index f5f2d48e50440c3f230821cd0e29b83aad398322..7c079e0799e4e9a7ab3c04373a4313b741283aa0 100644 (file)
@@ -6722,14 +6722,11 @@ PyUnicode_DecodeASCII(const char *s,
         case _Py_ERROR_REPLACE:
         case _Py_ERROR_SURROGATEESCAPE:
             /* Fast-path: the error handler only writes one character,
-               but we must switch to UCS2 at the first write */
-            if (kind < PyUnicode_2BYTE_KIND) {
-                if (_PyUnicodeWriter_Prepare(&writer, size - writer.pos,
-                                             0xffff) < 0)
-                    return NULL;
-                kind = writer.kind;
-                data = writer.data;
-            }
+               but we may switch to UCS2 at the first write */
+            if (_PyUnicodeWriter_PrepareKind(&writer, PyUnicode_2BYTE_KIND) < 0)
+                goto onError;
+            kind = writer.kind;
+            data = writer.data;
 
             if (error_handler == _Py_ERROR_REPLACE)
                 PyUnicode_WRITE(kind, data, writer.pos, 0xfffd);
@@ -13309,7 +13306,8 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
     Py_ssize_t newlen;
     PyObject *newbuffer;
 
-    assert(length > 0);
+    /* ensure that the _PyUnicodeWriter_Prepare macro was used */
+    assert(maxchar > writer->maxchar || length > 0);
 
     if (length > PY_SSIZE_T_MAX - writer->pos) {
         PyErr_NoMemory();
@@ -13375,6 +13373,28 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
 #undef OVERALLOCATE_FACTOR
 }
 
+int
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+                                     enum PyUnicode_Kind kind)
+{
+    Py_UCS4 maxchar;
+
+    /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
+    assert(writer->kind < kind);
+
+    switch (kind)
+    {
+    case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
+    case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
+    case PyUnicode_4BYTE_KIND: maxchar = 0x10ffff; break;
+    default:
+        assert(0 && "invalid kind");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
+}
+
 Py_LOCAL_INLINE(int)
 _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch)
 {