From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sun, 23 Jun 2013 17:21:16 +0000 (+0300)
Subject: Issue #18184: PyUnicode_FromFormat() and PyUnicode_FromFormatV() now raise
X-Git-Tag: v3.4.0a1~416
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c89533f72fbf15779d33c4533c801ed4c3d0ea18;p=python

Issue #18184: PyUnicode_FromFormat() and PyUnicode_FromFormatV() now raise
OverflowError when an argument of %c format is out of range.
---

c89533f72fbf15779d33c4533c801ed4c3d0ea18
diff --cc Lib/test/test_unicode.py
index 382b463093,0c82560ca7..518d6d6016
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@@ -2044,182 -2022,47 +2044,184 @@@ class UnicodeTest(string_tests.CommonTe
              PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
  
          # test "%c"
 -        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
 -        self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
 +        check_format('\uabcd',
 +                     b'%c', c_int(0xabcd))
 +        check_format('\U0010ffff',
 +                     b'%c', c_int(0x10ffff))
+         with self.assertRaises(OverflowError):
+             PyUnicode_FromFormat(b'%c', c_int(0x110000))
          # Issue #18183
 -        self.assertEqual(
 -            PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)),
 -            '\U00010000\U00100000')
 +        check_format('\U00010000\U00100000',
 +                     b'%c%c', c_int(0x10000), c_int(0x100000))
  
          # test "%"
 -        self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
 -        self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
 -        self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
 -        self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
 -        self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
 +        check_format('%',
 +                     b'%')
 +        check_format('%',
 +                     b'%%')
 +        check_format('%s',
 +                     b'%%s')
 +        check_format('[%]',
 +                     b'[%%]')
 +        check_format('%abc',
 +                     b'%%%s', b'abc')
 +
 +        # truncated string
 +        check_format('abc',
 +                     b'%.3s', b'abcdef')
 +        check_format('abc[\ufffd',
 +                     b'%.5s', 'abc[\u20ac]'.encode('utf8'))
 +        check_format("'\\u20acABC'",
 +                     b'%A', '\u20acABC')
 +        check_format("'\\u20",
 +                     b'%.5A', '\u20acABCDEF')
 +        check_format("'\u20acABC'",
 +                     b'%R', '\u20acABC')
 +        check_format("'\u20acA",
 +                     b'%.3R', '\u20acABCDEF')
 +        check_format('\u20acAB',
 +                     b'%.3S', '\u20acABCDEF')
 +        check_format('\u20acAB',
 +                     b'%.3U', '\u20acABCDEF')
 +        check_format('\u20acAB',
 +                     b'%.3V', '\u20acABCDEF', None)
 +        check_format('abc[\ufffd',
 +                     b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
 +
 +        # following tests comes from #7330
 +        # test width modifier and precision modifier with %S
 +        check_format("repr=  abc",
 +                     b'repr=%5S', 'abc')
 +        check_format("repr=ab",
 +                     b'repr=%.2S', 'abc')
 +        check_format("repr=   ab",
 +                     b'repr=%5.2S', 'abc')
 +
 +        # test width modifier and precision modifier with %R
 +        check_format("repr=   'abc'",
 +                     b'repr=%8R', 'abc')
 +        check_format("repr='ab",
 +                     b'repr=%.3R', 'abc')
 +        check_format("repr=  'ab",
 +                     b'repr=%5.3R', 'abc')
 +
 +        # test width modifier and precision modifier with %A
 +        check_format("repr=   'abc'",
 +                     b'repr=%8A', 'abc')
 +        check_format("repr='ab",
 +                     b'repr=%.3A', 'abc')
 +        check_format("repr=  'ab",
 +                     b'repr=%5.3A', 'abc')
 +
 +        # test width modifier and precision modifier with %s
 +        check_format("repr=  abc",
 +                     b'repr=%5s', b'abc')
 +        check_format("repr=ab",
 +                     b'repr=%.2s', b'abc')
 +        check_format("repr=   ab",
 +                     b'repr=%5.2s', b'abc')
 +
 +        # test width modifier and precision modifier with %U
 +        check_format("repr=  abc",
 +                     b'repr=%5U', 'abc')
 +        check_format("repr=ab",
 +                     b'repr=%.2U', 'abc')
 +        check_format("repr=   ab",
 +                     b'repr=%5.2U', 'abc')
 +
 +        # test width modifier and precision modifier with %V
 +        check_format("repr=  abc",
 +                     b'repr=%5V', 'abc', b'123')
 +        check_format("repr=ab",
 +                     b'repr=%.2V', 'abc', b'123')
 +        check_format("repr=   ab",
 +                     b'repr=%5.2V', 'abc', b'123')
 +        check_format("repr=  123",
 +                     b'repr=%5V', None, b'123')
 +        check_format("repr=12",
 +                     b'repr=%.2V', None, b'123')
 +        check_format("repr=   12",
 +                     b'repr=%5.2V', None, b'123')
  
          # test integer formats (%i, %d, %u)
 -        self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
 -        self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
 -        self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
 -
 -        self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
 -
 -        self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
 -        self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
 +        check_format('010',
 +                     b'%03i', c_int(10))
 +        check_format('0010',
 +                     b'%0.4i', c_int(10))
 +        check_format('-123',
 +                     b'%i', c_int(-123))
 +        check_format('-123',
 +                     b'%li', c_long(-123))
 +        check_format('-123',
 +                     b'%lli', c_longlong(-123))
 +        check_format('-123',
 +                     b'%zi', c_ssize_t(-123))
 +
 +        check_format('-123',
 +                     b'%d', c_int(-123))
 +        check_format('-123',
 +                     b'%ld', c_long(-123))
 +        check_format('-123',
 +                     b'%lld', c_longlong(-123))
 +        check_format('-123',
 +                     b'%zd', c_ssize_t(-123))
 +
 +        check_format('123',
 +                     b'%u', c_uint(123))
 +        check_format('123',
 +                     b'%lu', c_ulong(123))
 +        check_format('123',
 +                     b'%llu', c_ulonglong(123))
 +        check_format('123',
 +                     b'%zu', c_size_t(123))
 +
 +        # test long output
 +        min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
 +        max_longlong = -min_longlong - 1
 +        check_format(str(min_longlong),
 +                     b'%lld', c_longlong(min_longlong))
 +        check_format(str(max_longlong),
 +                     b'%lld', c_longlong(max_longlong))
 +        max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
 +        check_format(str(max_ulonglong),
 +                     b'%llu', c_ulonglong(max_ulonglong))
 +        PyUnicode_FromFormat(b'%p', c_void_p(-1))
 +
 +        # test padding (width and/or precision)
 +        check_format('123'.rjust(10, '0'),
 +                     b'%010i', c_int(123))
 +        check_format('123'.rjust(100),
 +                     b'%100i', c_int(123))
 +        check_format('123'.rjust(100, '0'),
 +                     b'%.100i', c_int(123))
 +        check_format('123'.rjust(80, '0').rjust(100),
 +                     b'%100.80i', c_int(123))
 +
 +        check_format('123'.rjust(10, '0'),
 +                     b'%010u', c_uint(123))
 +        check_format('123'.rjust(100),
 +                     b'%100u', c_uint(123))
 +        check_format('123'.rjust(100, '0'),
 +                     b'%.100u', c_uint(123))
 +        check_format('123'.rjust(80, '0').rjust(100),
 +                     b'%100.80u', c_uint(123))
 +
 +        check_format('123'.rjust(10, '0'),
 +                     b'%010x', c_int(0x123))
 +        check_format('123'.rjust(100),
 +                     b'%100x', c_int(0x123))
 +        check_format('123'.rjust(100, '0'),
 +                     b'%.100x', c_int(0x123))
 +        check_format('123'.rjust(80, '0').rjust(100),
 +                     b'%100.80x', c_int(0x123))
  
          # test %A
 -        text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
 -        self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
 +        check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
 +                     b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
  
          # test %V
 -        text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
 -        self.assertEqual(text, 'repr=abc')
 +        check_format('repr=abc',
 +                     b'repr=%V', 'abc', b'xyz')
  
          # Test string decode from parameter of %s using utf-8.
          # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
diff --cc Objects/unicodeobject.c
index c40e9ece5a,2e40c273a4..5659c71ce8
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@@ -2487,289 -2368,499 +2487,289 @@@ unicode_fromformat_arg(_PyUnicodeWrite
          size_tflag = 1;
          ++f;
      }
 -    if (p_longflag != NULL)
 -        *p_longflag = longflag;
 -    if (p_longlongflag != NULL)
 -        *p_longlongflag = longlongflag;
 -    if (p_size_tflag != NULL)
 -        *p_size_tflag = size_tflag;
 -    return f;
 -}
  
 -/* maximum number of characters required for output of %ld.  21 characters
 -   allows for 64-bit integers (in decimal) and an optional sign. */
 -#define MAX_LONG_CHARS 21
 -/* maximum number of characters required for output of %lld.
 -   We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
 -   plus 1 for the sign.  53/22 is an upper bound for log10(256). */
 -#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
 +    if (f[1] == '\0')
 +        writer->overallocate = 0;
  
 -PyObject *
 -PyUnicode_FromFormatV(const char *format, va_list vargs)
 -{
 -    va_list count;
 -    Py_ssize_t callcount = 0;
 -    PyObject **callresults = NULL;
 -    PyObject **callresult = NULL;
 -    Py_ssize_t n = 0;
 -    int width = 0;
 -    int precision = 0;
 -    int zeropad;
 -    const char* f;
 -    PyObject *string;
 -    /* used by sprintf */
 -    char fmt[61]; /* should be enough for %0width.precisionlld */
 -    Py_UCS4 maxchar = 127; /* result is ASCII by default */
 -    Py_UCS4 argmaxchar;
 -    Py_ssize_t numbersize = 0;
 -    char *numberresults = NULL;
 -    char *numberresult = NULL;
 -    Py_ssize_t i;
 -    int kind;
 -    void *data;
 +    switch (*f) {
 +    case 'c':
 +    {
 +        int ordinal = va_arg(*vargs, int);
 +        if (ordinal < 0 || ordinal > MAX_UNICODE) {
-             PyErr_SetString(PyExc_ValueError,
++            PyErr_SetString(PyExc_OverflowError,
 +                            "character argument not in range(0x110000)");
 +            return NULL;
 +        }
 +        if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0)
 +            return NULL;
 +        break;
 +    }
  
 -    Py_VA_COPY(count, vargs);
 -    /* step 1: count the number of %S/%R/%A/%s format specifications
 -     * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
 -     * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
 -     * result in an array)
 -     * also estimate a upper bound for all the number formats in the string,
 -     * numbers will be formatted in step 3 and be kept in a '\0'-separated
 -     * buffer before putting everything together. */
 -    for (f = format; *f; f++) {
 -        if (*f == '%') {
 -            int longlongflag;
 -            /* skip width or width.precision (eg. "1.2" of "%1.2f") */
 -            f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
 -            if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
 -                ++callcount;
 +    case 'i':
 +    case 'd':
 +    case 'u':
 +    case 'x':
 +    {
 +        /* used by sprintf */
 +        char fmt[10]; /* should be enough for "%0lld\0" */
 +        char buffer[MAX_LONG_LONG_CHARS];
 +        Py_ssize_t arglen;
  
 -            else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
 +        if (*f == 'u') {
 +            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
 +
 +            if (longflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, unsigned long));
  #ifdef HAVE_LONG_LONG
 -                if (longlongflag) {
 -                    if (width < MAX_LONG_LONG_CHARS)
 -                        width = MAX_LONG_LONG_CHARS;
 -                }
 -                else
 +            else if (longlongflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, unsigned PY_LONG_LONG));
  #endif
 -                    /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
 -                       including sign.  Decimal takes the most space.  This
 -                       isn't enough for octal.  If a width is specified we
 -                       need more (which we allocate later). */
 -                    if (width < MAX_LONG_CHARS)
 -                        width = MAX_LONG_CHARS;
 -
 -                /* account for the size + '\0' to separate numbers
 -                   inside of the numberresults buffer */
 -                numbersize += (width + 1);
 -            }
 +            else if (size_tflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, size_t));
 +            else
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, unsigned int));
 +        }
 +        else if (*f == 'x') {
 +            makefmt(fmt, 0, 0, 0, 'x');
 +            len = sprintf(buffer, fmt, va_arg(*vargs, int));
 +        }
 +        else {
 +            makefmt(fmt, longflag, longlongflag, size_tflag, *f);
 +
 +            if (longflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, long));
 +#ifdef HAVE_LONG_LONG
 +            else if (longlongflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, PY_LONG_LONG));
 +#endif
 +            else if (size_tflag)
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, Py_ssize_t));
 +            else
 +                len = sprintf(buffer, fmt,
 +                        va_arg(*vargs, int));
          }
 -        else if ((unsigned char)*f > 127) {
 -            PyErr_Format(PyExc_ValueError,
 -                "PyUnicode_FromFormatV() expects an ASCII-encoded format "
 -                "string, got a non-ASCII byte: 0x%02x",
 -                (unsigned char)*f);
 +        assert(len >= 0);
 +
 +        if (precision < len)
 +            precision = len;
 +
 +        arglen = Py_MAX(precision, width);
 +        assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
 +        if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
              return NULL;
 +
 +        if (width > precision) {
 +            Py_UCS4 fillchar;
 +            fill = width - precision;
 +            fillchar = zeropad?'0':' ';
 +            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
 +                return NULL;
 +            writer->pos += fill;
          }
 +        if (precision > len) {
 +            fill = precision - len;
 +            if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
 +                return NULL;
 +            writer->pos += fill;
 +        }
 +
 +        unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
 +        writer->pos += len;
 +        break;
      }
 -    /* step 2: allocate memory for the results of
 -     * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
 -    if (callcount) {
 -        callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
 -        if (!callresults) {
 -            PyErr_NoMemory();
 +
 +    case 'p':
 +    {
 +        char number[MAX_LONG_LONG_CHARS];
 +
 +        len = sprintf(number, "%p", va_arg(*vargs, void*));
 +        assert(len >= 0);
 +
 +        /* %p is ill-defined:  ensure leading 0x. */
 +        if (number[1] == 'X')
 +            number[1] = 'x';
 +        else if (number[1] != 'x') {
 +            memmove(number + 2, number,
 +                    strlen(number) + 1);
 +            number[0] = '0';
 +            number[1] = 'x';
 +            len += 2;
 +        }
 +
 +        assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
 +        if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
 +            return NULL;
 +        unicode_write_cstr(writer->buffer, writer->pos, number, len);
 +        writer->pos += len;
 +        break;
 +    }
 +
 +    case 's':
 +    {
 +        /* UTF-8 */
 +        const char *s = va_arg(*vargs, const char*);
 +        if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
 +            return NULL;
 +        break;
 +    }
 +
 +    case 'U':
 +    {
 +        PyObject *obj = va_arg(*vargs, PyObject *);
 +        assert(obj && _PyUnicode_CHECK(obj));
 +
 +        if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
              return NULL;
 +        break;
 +    }
 +
 +    case 'V':
 +    {
 +        PyObject *obj = va_arg(*vargs, PyObject *);
 +        const char *str = va_arg(*vargs, const char *);
 +        if (obj) {
 +            assert(_PyUnicode_CHECK(obj));
 +            if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
 +                return NULL;
 +        }
 +        else {
 +            assert(str != NULL);
 +            if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
 +                return NULL;
          }
 -        callresult = callresults;
 +        break;
      }
 -    /* step 2.5: allocate memory for the results of formating numbers */
 -    if (numbersize) {
 -        numberresults = PyObject_Malloc(numbersize);
 -        if (!numberresults) {
 -            PyErr_NoMemory();
 -            goto fail;
 +
 +    case 'S':
 +    {
 +        PyObject *obj = va_arg(*vargs, PyObject *);
 +        PyObject *str;
 +        assert(obj);
 +        str = PyObject_Str(obj);
 +        if (!str)
 +            return NULL;
 +        if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
 +            Py_DECREF(str);
 +            return NULL;
 +        }
 +        Py_DECREF(str);
 +        break;
 +    }
 +
 +    case 'R':
 +    {
 +        PyObject *obj = va_arg(*vargs, PyObject *);
 +        PyObject *repr;
 +        assert(obj);
 +        repr = PyObject_Repr(obj);
 +        if (!repr)
 +            return NULL;
 +        if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
 +            Py_DECREF(repr);
 +            return NULL;
 +        }
 +        Py_DECREF(repr);
 +        break;
 +    }
 +
 +    case 'A':
 +    {
 +        PyObject *obj = va_arg(*vargs, PyObject *);
 +        PyObject *ascii;
 +        assert(obj);
 +        ascii = PyObject_ASCII(obj);
 +        if (!ascii)
 +            return NULL;
 +        if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
 +            Py_DECREF(ascii);
 +            return NULL;
          }
 -        numberresult = numberresults;
 +        Py_DECREF(ascii);
 +        break;
 +    }
 +
 +    case '%':
 +        if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
 +            return NULL;
 +        break;
 +
 +    default:
 +        /* if we stumble upon an unknown formatting code, copy the rest
 +           of the format string to the output string. (we cannot just
 +           skip the code, since there's no way to know what's in the
 +           argument list) */
 +        len = strlen(p);
 +        if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
 +            return NULL;
 +        f = p+len;
 +        return f;
      }
  
 -    /* step 3: format numbers and figure out how large a buffer we need */
 -    for (f = format; *f; f++) {
 +    f++;
 +    return f;
 +}
 +
 +PyObject *
 +PyUnicode_FromFormatV(const char *format, va_list vargs)
 +{
 +    va_list vargs2;
 +    const char *f;
 +    _PyUnicodeWriter writer;
 +
 +    _PyUnicodeWriter_Init(&writer);
 +    writer.min_length = strlen(format) + 100;
 +    writer.overallocate = 1;
 +
 +    /* va_list may be an array (of 1 item) on some platforms (ex: AMD64).
 +       Copy it to be able to pass a reference to a subfunction. */
 +    Py_VA_COPY(vargs2, vargs);
 +
 +    for (f = format; *f; ) {
          if (*f == '%') {
 -            const char* p;
 -            int longflag;
 -            int longlongflag;
 -            int size_tflag;
 -            int numprinted;
 +            f = unicode_fromformat_arg(&writer, f, &vargs2);
 +            if (f == NULL)
 +                goto fail;
 +        }
 +        else {
 +            const char *p;
 +            Py_ssize_t len;
  
              p = f;
 -            zeropad = (f[1] == '0');
 -            f = parse_format_flags(f, &width, &precision,
 -                                   &longflag, &longlongflag, &size_tflag);
 -            switch (*f) {
 -            case 'c':
 +            do
              {
 -                int ordinal = va_arg(count, int);
 -                if (ordinal < 0 || ordinal > MAX_UNICODE) {
 -                    PyErr_SetString(PyExc_OverflowError,
 -                                    "%c arg not in range(0x110000)");
 -                    goto fail;
 +                if ((unsigned char)*p > 127) {
 +                    PyErr_Format(PyExc_ValueError,
 +                        "PyUnicode_FromFormatV() expects an ASCII-encoded format "
 +                        "string, got a non-ASCII byte: 0x%02x",
 +                        (unsigned char)*p);
 +                    return NULL;
                  }
 -                maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal);
 -                n++;
 -                break;
 +                p++;
              }
 -            case '%':
 -                n++;
 -                break;
 -            case 'i':
 -            case 'd':
 -                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
 -                        width, precision, *f);
 -                if (longflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, long));
 -#ifdef HAVE_LONG_LONG
 -                else if (longlongflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, PY_LONG_LONG));
 -#endif
 -                else if (size_tflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, Py_ssize_t));
 -                else
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, int));
 -                n += numprinted;
 -                /* advance by +1 to skip over the '\0' */
 -                numberresult += (numprinted + 1);
 -                assert(*(numberresult - 1) == '\0');
 -                assert(*(numberresult - 2) != '\0');
 -                assert(numprinted >= 0);
 -                assert(numberresult <= numberresults + numbersize);
 -                break;
 -            case 'u':
 -                makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
 -                        width, precision, 'u');
 -                if (longflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, unsigned long));
 -#ifdef HAVE_LONG_LONG
 -                else if (longlongflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, unsigned PY_LONG_LONG));
 -#endif
 -                else if (size_tflag)
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, size_t));
 -                else
 -                    numprinted = sprintf(numberresult, fmt,
 -                                         va_arg(count, unsigned int));
 -                n += numprinted;
 -                numberresult += (numprinted + 1);
 -                assert(*(numberresult - 1) == '\0');
 -                assert(*(numberresult - 2) != '\0');
 -                assert(numprinted >= 0);
 -                assert(numberresult <= numberresults + numbersize);
 -                break;
 -            case 'x':
 -                makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
 -                numprinted = sprintf(numberresult, fmt, va_arg(count, int));
 -                n += numprinted;
 -                numberresult += (numprinted + 1);
 -                assert(*(numberresult - 1) == '\0');
 -                assert(*(numberresult - 2) != '\0');
 -                assert(numprinted >= 0);
 -                assert(numberresult <= numberresults + numbersize);
 -                break;
 -            case 'p':
 -                numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
 -                /* %p is ill-defined:  ensure leading 0x. */
 -                if (numberresult[1] == 'X')
 -                    numberresult[1] = 'x';
 -                else if (numberresult[1] != 'x') {
 -                    memmove(numberresult + 2, numberresult,
 -                            strlen(numberresult) + 1);
 -                    numberresult[0] = '0';
 -                    numberresult[1] = 'x';
 -                    numprinted += 2;
 -                }
 -                n += numprinted;
 -                numberresult += (numprinted + 1);
 -                assert(*(numberresult - 1) == '\0');
 -                assert(*(numberresult - 2) != '\0');
 -                assert(numprinted >= 0);
 -                assert(numberresult <= numberresults + numbersize);
 -                break;
 -            case 's':
 -            {
 -                /* UTF-8 */
 -                const char *s = va_arg(count, const char*);
 -                PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL);
 -                if (!str)
 -                    goto fail;
 -                /* since PyUnicode_DecodeUTF8 returns already flexible
 -                   unicode objects, there is no need to call ready on them */
 -                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
 -                maxchar = Py_MAX(maxchar, argmaxchar);
 -                n += PyUnicode_GET_LENGTH(str);
 -                /* Remember the str and switch to the next slot */
 -                *callresult++ = str;
 -                break;
 -            }
 -            case 'U':
 -            {
 -                PyObject *obj = va_arg(count, PyObject *);
 -                assert(obj && _PyUnicode_CHECK(obj));
 -                if (PyUnicode_READY(obj) == -1)
 -                    goto fail;
 -                argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
 -                maxchar = Py_MAX(maxchar, argmaxchar);
 -                n += PyUnicode_GET_LENGTH(obj);
 -                break;
 -            }
 -            case 'V':
 -            {
 -                PyObject *obj = va_arg(count, PyObject *);
 -                const char *str = va_arg(count, const char *);
 -                PyObject *str_obj;
 -                assert(obj || str);
 -                assert(!obj || _PyUnicode_CHECK(obj));
 -                if (obj) {
 -                    if (PyUnicode_READY(obj) == -1)
 -                        goto fail;
 -                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
 -                    maxchar = Py_MAX(maxchar, argmaxchar);
 -                    n += PyUnicode_GET_LENGTH(obj);
 -                    *callresult++ = NULL;
 -                }
 -                else {
 -                    str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL);
 -                    if (!str_obj)
 -                        goto fail;
 -                    if (PyUnicode_READY(str_obj) == -1) {
 -                        Py_DECREF(str_obj);
 -                        goto fail;
 -                    }
 -                    argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
 -                    maxchar = Py_MAX(maxchar, argmaxchar);
 -                    n += PyUnicode_GET_LENGTH(str_obj);
 -                    *callresult++ = str_obj;
 -                }
 -                break;
 -            }
 -            case 'S':
 -            {
 -                PyObject *obj = va_arg(count, PyObject *);
 -                PyObject *str;
 -                assert(obj);
 -                str = PyObject_Str(obj);
 -                if (!str)
 -                    goto fail;
 -                if (PyUnicode_READY(str) == -1) {
 -                    Py_DECREF(str);
 -                    goto fail;
 -                }
 -                argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
 -                maxchar = Py_MAX(maxchar, argmaxchar);
 -                n += PyUnicode_GET_LENGTH(str);
 -                /* Remember the str and switch to the next slot */
 -                *callresult++ = str;
 -                break;
 -            }
 -            case 'R':
 -            {
 -                PyObject *obj = va_arg(count, PyObject *);
 -                PyObject *repr;
 -                assert(obj);
 -                repr = PyObject_Repr(obj);
 -                if (!repr)
 -                    goto fail;
 -                if (PyUnicode_READY(repr) == -1) {
 -                    Py_DECREF(repr);
 -                    goto fail;
 -                }
 -                argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
 -                maxchar = Py_MAX(maxchar, argmaxchar);
 -                n += PyUnicode_GET_LENGTH(repr);
 -                /* Remember the repr and switch to the next slot */
 -                *callresult++ = repr;
 -                break;
 -            }
 -            case 'A':
 -            {
 -                PyObject *obj = va_arg(count, PyObject *);
 -                PyObject *ascii;
 -                assert(obj);
 -                ascii = PyObject_ASCII(obj);
 -                if (!ascii)
 -                    goto fail;
 -                if (PyUnicode_READY(ascii) == -1) {
 -                    Py_DECREF(ascii);
 -                    goto fail;
 -                }
 -                argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
 -                maxchar = Py_MAX(maxchar, argmaxchar);
 -                n += PyUnicode_GET_LENGTH(ascii);
 -                /* Remember the repr and switch to the next slot */
 -                *callresult++ = ascii;
 -                break;
 -            }
 -            default:
 -                /* if we stumble upon an unknown
 -                   formatting code, copy the rest of
 -                   the format string to the output
 -                   string. (we cannot just skip the
 -                   code, since there's no way to know
 -                   what's in the argument list) */
 -                n += strlen(p);
 -                goto expand;
 -            }
 -        } else
 -            n++;
 -    }
 -  expand:
 -    /* step 4: fill the buffer */
 -    /* Since we've analyzed how much space we need,
 -       we don't have to resize the string.
 -       There can be no errors beyond this point. */
 -    string = PyUnicode_New(n, maxchar);
 -    if (!string)
 -        goto fail;
 -    kind = PyUnicode_KIND(string);
 -    data = PyUnicode_DATA(string);
 -    callresult = callresults;
 -    numberresult = numberresults;
 +            while (*p != '\0' && *p != '%');
 +            len = p - f;
  
 -    for (i = 0, f = format; *f; f++) {
 -        if (*f == '%') {
 -            const char* p;
 +            if (*p == '\0')
 +                writer.overallocate = 0;
 +            if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
 +                goto fail;
 +            unicode_write_cstr(writer.buffer, writer.pos, f, len);
 +            writer.pos += len;
  
 -            p = f;
 -            f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
 -            /* checking for == because the last argument could be a empty
 -               string, which causes i to point to end, the assert at the end of
 -               the loop */
 -            assert(i <= PyUnicode_GET_LENGTH(string));
 -
 -            switch (*f) {
 -            case 'c':
 -            {
 -                const int ordinal = va_arg(vargs, int);
 -                PyUnicode_WRITE(kind, data, i++, ordinal);
 -                break;
 -            }
 -            case 'i':
 -            case 'd':
 -            case 'u':
 -            case 'x':
 -            case 'p':
 -            {
 -                Py_ssize_t len;
 -                /* unused, since we already have the result */
 -                if (*f == 'p')
 -                    (void) va_arg(vargs, void *);
 -                else
 -                    (void) va_arg(vargs, int);
 -                /* extract the result from numberresults and append. */
 -                len = strlen(numberresult);
 -                unicode_write_cstr(string, i, numberresult, len);
 -                /* skip over the separating '\0' */
 -                i += len;
 -                numberresult += len;
 -                assert(*numberresult == '\0');
 -                numberresult++;
 -                assert(numberresult <= numberresults + numbersize);
 -                break;
 -            }
 -            case 's':
 -            {
 -                /* unused, since we already have the result */
 -                Py_ssize_t size;
 -                (void) va_arg(vargs, char *);
 -                size = PyUnicode_GET_LENGTH(*callresult);
 -                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
 -                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
 -                i += size;
 -                /* We're done with the unicode()/repr() => forget it */
 -                Py_DECREF(*callresult);
 -                /* switch to next unicode()/repr() result */
 -                ++callresult;
 -                break;
 -            }
 -            case 'U':
 -            {
 -                PyObject *obj = va_arg(vargs, PyObject *);
 -                Py_ssize_t size;
 -                assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
 -                size = PyUnicode_GET_LENGTH(obj);
 -                _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
 -                i += size;
 -                break;
 -            }
 -            case 'V':
 -            {
 -                Py_ssize_t size;
 -                PyObject *obj = va_arg(vargs, PyObject *);
 -                va_arg(vargs, const char *);
 -                if (obj) {
 -                    size = PyUnicode_GET_LENGTH(obj);
 -                    assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
 -                    _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
 -                    i += size;
 -                } else {
 -                    size = PyUnicode_GET_LENGTH(*callresult);
 -                    assert(PyUnicode_KIND(*callresult) <=
 -                           PyUnicode_KIND(string));
 -                    _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
 -                    i += size;
 -                    Py_DECREF(*callresult);
 -                }
 -                ++callresult;
 -                break;
 -            }
 -            case 'S':
 -            case 'R':
 -            case 'A':
 -            {
 -                Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
 -                /* unused, since we already have the result */
 -                (void) va_arg(vargs, PyObject *);
 -                assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
 -                _PyUnicode_FastCopyCharacters(string, i, *callresult, 0,  size);
 -                i += size;
 -                /* We're done with the unicode()/repr() => forget it */
 -                Py_DECREF(*callresult);
 -                /* switch to next unicode()/repr() result */
 -                ++callresult;
 -                break;
 -            }
 -            case '%':
 -                PyUnicode_WRITE(kind, data, i++, '%');
 -                break;
 -            default:
 -            {
 -                Py_ssize_t len = strlen(p);
 -                unicode_write_cstr(string, i, p, len);
 -                i += len;
 -                assert(i == PyUnicode_GET_LENGTH(string));
 -                goto end;
 -            }
 -            }
 -        }
 -        else {
 -            assert(i < PyUnicode_GET_LENGTH(string));
 -            PyUnicode_WRITE(kind, data, i++, *f);
 +            f = p;
          }
      }
 -    assert(i == PyUnicode_GET_LENGTH(string));
 +    return _PyUnicodeWriter_Finish(&writer);
  
 -  end:
 -    if (callresults)
 -        PyObject_Free(callresults);
 -    if (numberresults)
 -        PyObject_Free(numberresults);
 -    return unicode_result(string);
    fail:
 -    if (callresults) {
 -        PyObject **callresult2 = callresults;
 -        while (callresult2 < callresult) {
 -            Py_XDECREF(*callresult2);
 -            ++callresult2;
 -        }
 -        PyObject_Free(callresults);
 -    }
 -    if (numberresults)
 -        PyObject_Free(numberresults);
 +    _PyUnicodeWriter_Dealloc(&writer);
      return NULL;
  }