From: Serhiy Storchaka Date: Sun, 23 Jun 2013 17:21:16 +0000 (+0300) Subject: Issue #18184: PyUnicode_FromFormat() and PyUnicode_FromFormatV() now raise X-Git-Tag: v3.4.0a1~416 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c89533f72fbf15779d33c4533c801ed4c3d0ea18;p=python Issue #18184: PyUnicode_FromFormat() and PyUnicode_FromFormatV() now raise OverflowError when an argument of %c format is out of range. --- c89533f72fbf15779d33c4533c801ed4c3d0ea18 diff --cc Lib/test/test_unicode.py index 382b463093,0c82560ca7..518d6d6016 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@@ -2044,182 -2022,47 +2044,184 @@@ class UnicodeTest(string_tests.CommonTe PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii') # test "%c" - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd') - self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff') + check_format('\uabcd', + b'%c', c_int(0xabcd)) + check_format('\U0010ffff', + b'%c', c_int(0x10ffff)) + with self.assertRaises(OverflowError): + PyUnicode_FromFormat(b'%c', c_int(0x110000)) # Issue #18183 - self.assertEqual( - PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)), - '\U00010000\U00100000') + check_format('\U00010000\U00100000', + b'%c%c', c_int(0x10000), c_int(0x100000)) # test "%" - self.assertEqual(PyUnicode_FromFormat(b'%'), '%') - self.assertEqual(PyUnicode_FromFormat(b'%%'), '%') - self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s') - self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]') - self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc') + check_format('%', + b'%') + check_format('%', + b'%%') + check_format('%s', + b'%%s') + check_format('[%]', + b'[%%]') + check_format('%abc', + b'%%%s', b'abc') + + # truncated string + check_format('abc', + b'%.3s', b'abcdef') + check_format('abc[\ufffd', + b'%.5s', 'abc[\u20ac]'.encode('utf8')) + check_format("'\\u20acABC'", + b'%A', '\u20acABC') + check_format("'\\u20", + b'%.5A', '\u20acABCDEF') + check_format("'\u20acABC'", + b'%R', '\u20acABC') + check_format("'\u20acA", + b'%.3R', '\u20acABCDEF') + check_format('\u20acAB', + b'%.3S', '\u20acABCDEF') + check_format('\u20acAB', + b'%.3U', '\u20acABCDEF') + check_format('\u20acAB', + b'%.3V', '\u20acABCDEF', None) + check_format('abc[\ufffd', + b'%.5V', None, 'abc[\u20ac]'.encode('utf8')) + + # following tests comes from #7330 + # test width modifier and precision modifier with %S + check_format("repr= abc", + b'repr=%5S', 'abc') + check_format("repr=ab", + b'repr=%.2S', 'abc') + check_format("repr= ab", + b'repr=%5.2S', 'abc') + + # test width modifier and precision modifier with %R + check_format("repr= 'abc'", + b'repr=%8R', 'abc') + check_format("repr='ab", + b'repr=%.3R', 'abc') + check_format("repr= 'ab", + b'repr=%5.3R', 'abc') + + # test width modifier and precision modifier with %A + check_format("repr= 'abc'", + b'repr=%8A', 'abc') + check_format("repr='ab", + b'repr=%.3A', 'abc') + check_format("repr= 'ab", + b'repr=%5.3A', 'abc') + + # test width modifier and precision modifier with %s + check_format("repr= abc", + b'repr=%5s', b'abc') + check_format("repr=ab", + b'repr=%.2s', b'abc') + check_format("repr= ab", + b'repr=%5.2s', b'abc') + + # test width modifier and precision modifier with %U + check_format("repr= abc", + b'repr=%5U', 'abc') + check_format("repr=ab", + b'repr=%.2U', 'abc') + check_format("repr= ab", + b'repr=%5.2U', 'abc') + + # test width modifier and precision modifier with %V + check_format("repr= abc", + b'repr=%5V', 'abc', b'123') + check_format("repr=ab", + b'repr=%.2V', 'abc', b'123') + check_format("repr= ab", + b'repr=%5.2V', 'abc', b'123') + check_format("repr= 123", + b'repr=%5V', None, b'123') + check_format("repr=12", + b'repr=%.2V', None, b'123') + check_format("repr= 12", + b'repr=%5.2V', None, b'123') # test integer formats (%i, %d, %u) - self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010') - self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010') - self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123') - - self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123') - self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123') - - self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123') - self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123') + check_format('010', + b'%03i', c_int(10)) + check_format('0010', + b'%0.4i', c_int(10)) + check_format('-123', + b'%i', c_int(-123)) + check_format('-123', + b'%li', c_long(-123)) + check_format('-123', + b'%lli', c_longlong(-123)) + check_format('-123', + b'%zi', c_ssize_t(-123)) + + check_format('-123', + b'%d', c_int(-123)) + check_format('-123', + b'%ld', c_long(-123)) + check_format('-123', + b'%lld', c_longlong(-123)) + check_format('-123', + b'%zd', c_ssize_t(-123)) + + check_format('123', + b'%u', c_uint(123)) + check_format('123', + b'%lu', c_ulong(123)) + check_format('123', + b'%llu', c_ulonglong(123)) + check_format('123', + b'%zu', c_size_t(123)) + + # test long output + min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1)) + max_longlong = -min_longlong - 1 + check_format(str(min_longlong), + b'%lld', c_longlong(min_longlong)) + check_format(str(max_longlong), + b'%lld', c_longlong(max_longlong)) + max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1 + check_format(str(max_ulonglong), + b'%llu', c_ulonglong(max_ulonglong)) + PyUnicode_FromFormat(b'%p', c_void_p(-1)) + + # test padding (width and/or precision) + check_format('123'.rjust(10, '0'), + b'%010i', c_int(123)) + check_format('123'.rjust(100), + b'%100i', c_int(123)) + check_format('123'.rjust(100, '0'), + b'%.100i', c_int(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80i', c_int(123)) + + check_format('123'.rjust(10, '0'), + b'%010u', c_uint(123)) + check_format('123'.rjust(100), + b'%100u', c_uint(123)) + check_format('123'.rjust(100, '0'), + b'%.100u', c_uint(123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80u', c_uint(123)) + + check_format('123'.rjust(10, '0'), + b'%010x', c_int(0x123)) + check_format('123'.rjust(100), + b'%100x', c_int(0x123)) + check_format('123'.rjust(100, '0'), + b'%.100x', c_int(0x123)) + check_format('123'.rjust(80, '0').rjust(100), + b'%100.80x', c_int(0x123)) # test %A - text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') - self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'") + check_format(r"%A:'abc\xe9\uabcd\U0010ffff'", + b'%%A:%A', 'abc\xe9\uabcd\U0010ffff') # test %V - text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz') - self.assertEqual(text, 'repr=abc') + check_format('repr=abc', + b'repr=%V', 'abc', b'xyz') # Test string decode from parameter of %s using utf-8. # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of diff --cc Objects/unicodeobject.c index c40e9ece5a,2e40c273a4..5659c71ce8 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -2487,289 -2368,499 +2487,289 @@@ unicode_fromformat_arg(_PyUnicodeWrite size_tflag = 1; ++f; } - if (p_longflag != NULL) - *p_longflag = longflag; - if (p_longlongflag != NULL) - *p_longlongflag = longlongflag; - if (p_size_tflag != NULL) - *p_size_tflag = size_tflag; - return f; -} -/* maximum number of characters required for output of %ld. 21 characters - allows for 64-bit integers (in decimal) and an optional sign. */ -#define MAX_LONG_CHARS 21 -/* maximum number of characters required for output of %lld. - We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits, - plus 1 for the sign. 53/22 is an upper bound for log10(256). */ -#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22) + if (f[1] == '\0') + writer->overallocate = 0; -PyObject * -PyUnicode_FromFormatV(const char *format, va_list vargs) -{ - va_list count; - Py_ssize_t callcount = 0; - PyObject **callresults = NULL; - PyObject **callresult = NULL; - Py_ssize_t n = 0; - int width = 0; - int precision = 0; - int zeropad; - const char* f; - PyObject *string; - /* used by sprintf */ - char fmt[61]; /* should be enough for %0width.precisionlld */ - Py_UCS4 maxchar = 127; /* result is ASCII by default */ - Py_UCS4 argmaxchar; - Py_ssize_t numbersize = 0; - char *numberresults = NULL; - char *numberresult = NULL; - Py_ssize_t i; - int kind; - void *data; + switch (*f) { + case 'c': + { + int ordinal = va_arg(*vargs, int); + if (ordinal < 0 || ordinal > MAX_UNICODE) { - PyErr_SetString(PyExc_ValueError, ++ PyErr_SetString(PyExc_OverflowError, + "character argument not in range(0x110000)"); + return NULL; + } + if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0) + return NULL; + break; + } - Py_VA_COPY(count, vargs); - /* step 1: count the number of %S/%R/%A/%s format specifications - * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/ - * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the - * result in an array) - * also estimate a upper bound for all the number formats in the string, - * numbers will be formatted in step 3 and be kept in a '\0'-separated - * buffer before putting everything together. */ - for (f = format; *f; f++) { - if (*f == '%') { - int longlongflag; - /* skip width or width.precision (eg. "1.2" of "%1.2f") */ - f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL); - if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V') - ++callcount; + case 'i': + case 'd': + case 'u': + case 'x': + { + /* used by sprintf */ + char fmt[10]; /* should be enough for "%0lld\0" */ + char buffer[MAX_LONG_LONG_CHARS]; + Py_ssize_t arglen; - else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') { + if (*f == 'u') { + makefmt(fmt, longflag, longlongflag, size_tflag, *f); + + if (longflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned long)); #ifdef HAVE_LONG_LONG - if (longlongflag) { - if (width < MAX_LONG_LONG_CHARS) - width = MAX_LONG_LONG_CHARS; - } - else + else if (longlongflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned PY_LONG_LONG)); #endif - /* MAX_LONG_CHARS is enough to hold a 64-bit integer, - including sign. Decimal takes the most space. This - isn't enough for octal. If a width is specified we - need more (which we allocate later). */ - if (width < MAX_LONG_CHARS) - width = MAX_LONG_CHARS; - - /* account for the size + '\0' to separate numbers - inside of the numberresults buffer */ - numbersize += (width + 1); - } + else if (size_tflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, size_t)); + else + len = sprintf(buffer, fmt, + va_arg(*vargs, unsigned int)); + } + else if (*f == 'x') { + makefmt(fmt, 0, 0, 0, 'x'); + len = sprintf(buffer, fmt, va_arg(*vargs, int)); + } + else { + makefmt(fmt, longflag, longlongflag, size_tflag, *f); + + if (longflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, long)); +#ifdef HAVE_LONG_LONG + else if (longlongflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, PY_LONG_LONG)); +#endif + else if (size_tflag) + len = sprintf(buffer, fmt, + va_arg(*vargs, Py_ssize_t)); + else + len = sprintf(buffer, fmt, + va_arg(*vargs, int)); } - else if ((unsigned char)*f > 127) { - PyErr_Format(PyExc_ValueError, - "PyUnicode_FromFormatV() expects an ASCII-encoded format " - "string, got a non-ASCII byte: 0x%02x", - (unsigned char)*f); + assert(len >= 0); + + if (precision < len) + precision = len; + + arglen = Py_MAX(precision, width); + assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127); + if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1) return NULL; + + if (width > precision) { + Py_UCS4 fillchar; + fill = width - precision; + fillchar = zeropad?'0':' '; + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1) + return NULL; + writer->pos += fill; } + if (precision > len) { + fill = precision - len; + if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1) + return NULL; + writer->pos += fill; + } + + unicode_write_cstr(writer->buffer, writer->pos, buffer, len); + writer->pos += len; + break; } - /* step 2: allocate memory for the results of - * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */ - if (callcount) { - callresults = PyObject_Malloc(sizeof(PyObject *) * callcount); - if (!callresults) { - PyErr_NoMemory(); + + case 'p': + { + char number[MAX_LONG_LONG_CHARS]; + + len = sprintf(number, "%p", va_arg(*vargs, void*)); + assert(len >= 0); + + /* %p is ill-defined: ensure leading 0x. */ + if (number[1] == 'X') + number[1] = 'x'; + else if (number[1] != 'x') { + memmove(number + 2, number, + strlen(number) + 1); + number[0] = '0'; + number[1] = 'x'; + len += 2; + } + + assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127); + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return NULL; + unicode_write_cstr(writer->buffer, writer->pos, number, len); + writer->pos += len; + break; + } + + case 's': + { + /* UTF-8 */ + const char *s = va_arg(*vargs, const char*); + if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0) + return NULL; + break; + } + + case 'U': + { + PyObject *obj = va_arg(*vargs, PyObject *); + assert(obj && _PyUnicode_CHECK(obj)); + + if (unicode_fromformat_write_str(writer, obj, width, precision) == -1) return NULL; + break; + } + + case 'V': + { + PyObject *obj = va_arg(*vargs, PyObject *); + const char *str = va_arg(*vargs, const char *); + if (obj) { + assert(_PyUnicode_CHECK(obj)); + if (unicode_fromformat_write_str(writer, obj, width, precision) == -1) + return NULL; + } + else { + assert(str != NULL); + if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0) + return NULL; } - callresult = callresults; + break; } - /* step 2.5: allocate memory for the results of formating numbers */ - if (numbersize) { - numberresults = PyObject_Malloc(numbersize); - if (!numberresults) { - PyErr_NoMemory(); - goto fail; + + case 'S': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *str; + assert(obj); + str = PyObject_Str(obj); + if (!str) + return NULL; + if (unicode_fromformat_write_str(writer, str, width, precision) == -1) { + Py_DECREF(str); + return NULL; + } + Py_DECREF(str); + break; + } + + case 'R': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *repr; + assert(obj); + repr = PyObject_Repr(obj); + if (!repr) + return NULL; + if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) { + Py_DECREF(repr); + return NULL; + } + Py_DECREF(repr); + break; + } + + case 'A': + { + PyObject *obj = va_arg(*vargs, PyObject *); + PyObject *ascii; + assert(obj); + ascii = PyObject_ASCII(obj); + if (!ascii) + return NULL; + if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) { + Py_DECREF(ascii); + return NULL; } - numberresult = numberresults; + Py_DECREF(ascii); + break; + } + + case '%': + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) + return NULL; + break; + + default: + /* if we stumble upon an unknown formatting code, copy the rest + of the format string to the output string. (we cannot just + skip the code, since there's no way to know what's in the + argument list) */ + len = strlen(p); + if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1) + return NULL; + f = p+len; + return f; } - /* step 3: format numbers and figure out how large a buffer we need */ - for (f = format; *f; f++) { + f++; + return f; +} + +PyObject * +PyUnicode_FromFormatV(const char *format, va_list vargs) +{ + va_list vargs2; + const char *f; + _PyUnicodeWriter writer; + + _PyUnicodeWriter_Init(&writer); + writer.min_length = strlen(format) + 100; + writer.overallocate = 1; + + /* va_list may be an array (of 1 item) on some platforms (ex: AMD64). + Copy it to be able to pass a reference to a subfunction. */ + Py_VA_COPY(vargs2, vargs); + + for (f = format; *f; ) { if (*f == '%') { - const char* p; - int longflag; - int longlongflag; - int size_tflag; - int numprinted; + f = unicode_fromformat_arg(&writer, f, &vargs2); + if (f == NULL) + goto fail; + } + else { + const char *p; + Py_ssize_t len; p = f; - zeropad = (f[1] == '0'); - f = parse_format_flags(f, &width, &precision, - &longflag, &longlongflag, &size_tflag); - switch (*f) { - case 'c': + do { - int ordinal = va_arg(count, int); - if (ordinal < 0 || ordinal > MAX_UNICODE) { - PyErr_SetString(PyExc_OverflowError, - "%c arg not in range(0x110000)"); - goto fail; + if ((unsigned char)*p > 127) { + PyErr_Format(PyExc_ValueError, + "PyUnicode_FromFormatV() expects an ASCII-encoded format " + "string, got a non-ASCII byte: 0x%02x", + (unsigned char)*p); + return NULL; } - maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal); - n++; - break; + p++; } - case '%': - n++; - break; - case 'i': - case 'd': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, *f); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, Py_ssize_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, int)); - n += numprinted; - /* advance by +1 to skip over the '\0' */ - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'u': - makefmt(fmt, longflag, longlongflag, size_tflag, zeropad, - width, precision, 'u'); - if (longflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned long)); -#ifdef HAVE_LONG_LONG - else if (longlongflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned PY_LONG_LONG)); -#endif - else if (size_tflag) - numprinted = sprintf(numberresult, fmt, - va_arg(count, size_t)); - else - numprinted = sprintf(numberresult, fmt, - va_arg(count, unsigned int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'x': - makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x'); - numprinted = sprintf(numberresult, fmt, va_arg(count, int)); - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 'p': - numprinted = sprintf(numberresult, "%p", va_arg(count, void*)); - /* %p is ill-defined: ensure leading 0x. */ - if (numberresult[1] == 'X') - numberresult[1] = 'x'; - else if (numberresult[1] != 'x') { - memmove(numberresult + 2, numberresult, - strlen(numberresult) + 1); - numberresult[0] = '0'; - numberresult[1] = 'x'; - numprinted += 2; - } - n += numprinted; - numberresult += (numprinted + 1); - assert(*(numberresult - 1) == '\0'); - assert(*(numberresult - 2) != '\0'); - assert(numprinted >= 0); - assert(numberresult <= numberresults + numbersize); - break; - case 's': - { - /* UTF-8 */ - const char *s = va_arg(count, const char*); - PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL); - if (!str) - goto fail; - /* since PyUnicode_DecodeUTF8 returns already flexible - unicode objects, there is no need to call ready on them */ - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'U': - { - PyObject *obj = va_arg(count, PyObject *); - assert(obj && _PyUnicode_CHECK(obj)); - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - break; - } - case 'V': - { - PyObject *obj = va_arg(count, PyObject *); - const char *str = va_arg(count, const char *); - PyObject *str_obj; - assert(obj || str); - assert(!obj || _PyUnicode_CHECK(obj)); - if (obj) { - if (PyUnicode_READY(obj) == -1) - goto fail; - argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(obj); - *callresult++ = NULL; - } - else { - str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL); - if (!str_obj) - goto fail; - if (PyUnicode_READY(str_obj) == -1) { - Py_DECREF(str_obj); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str_obj); - *callresult++ = str_obj; - } - break; - } - case 'S': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *str; - assert(obj); - str = PyObject_Str(obj); - if (!str) - goto fail; - if (PyUnicode_READY(str) == -1) { - Py_DECREF(str); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(str); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(str); - /* Remember the str and switch to the next slot */ - *callresult++ = str; - break; - } - case 'R': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *repr; - assert(obj); - repr = PyObject_Repr(obj); - if (!repr) - goto fail; - if (PyUnicode_READY(repr) == -1) { - Py_DECREF(repr); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(repr); - /* Remember the repr and switch to the next slot */ - *callresult++ = repr; - break; - } - case 'A': - { - PyObject *obj = va_arg(count, PyObject *); - PyObject *ascii; - assert(obj); - ascii = PyObject_ASCII(obj); - if (!ascii) - goto fail; - if (PyUnicode_READY(ascii) == -1) { - Py_DECREF(ascii); - goto fail; - } - argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii); - maxchar = Py_MAX(maxchar, argmaxchar); - n += PyUnicode_GET_LENGTH(ascii); - /* Remember the repr and switch to the next slot */ - *callresult++ = ascii; - break; - } - default: - /* if we stumble upon an unknown - formatting code, copy the rest of - the format string to the output - string. (we cannot just skip the - code, since there's no way to know - what's in the argument list) */ - n += strlen(p); - goto expand; - } - } else - n++; - } - expand: - /* step 4: fill the buffer */ - /* Since we've analyzed how much space we need, - we don't have to resize the string. - There can be no errors beyond this point. */ - string = PyUnicode_New(n, maxchar); - if (!string) - goto fail; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - callresult = callresults; - numberresult = numberresults; + while (*p != '\0' && *p != '%'); + len = p - f; - for (i = 0, f = format; *f; f++) { - if (*f == '%') { - const char* p; + if (*p == '\0') + writer.overallocate = 0; + if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1) + goto fail; + unicode_write_cstr(writer.buffer, writer.pos, f, len); + writer.pos += len; - p = f; - f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL); - /* checking for == because the last argument could be a empty - string, which causes i to point to end, the assert at the end of - the loop */ - assert(i <= PyUnicode_GET_LENGTH(string)); - - switch (*f) { - case 'c': - { - const int ordinal = va_arg(vargs, int); - PyUnicode_WRITE(kind, data, i++, ordinal); - break; - } - case 'i': - case 'd': - case 'u': - case 'x': - case 'p': - { - Py_ssize_t len; - /* unused, since we already have the result */ - if (*f == 'p') - (void) va_arg(vargs, void *); - else - (void) va_arg(vargs, int); - /* extract the result from numberresults and append. */ - len = strlen(numberresult); - unicode_write_cstr(string, i, numberresult, len); - /* skip over the separating '\0' */ - i += len; - numberresult += len; - assert(*numberresult == '\0'); - numberresult++; - assert(numberresult <= numberresults + numbersize); - break; - } - case 's': - { - /* unused, since we already have the result */ - Py_ssize_t size; - (void) va_arg(vargs, char *); - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; - } - case 'U': - { - PyObject *obj = va_arg(vargs, PyObject *); - Py_ssize_t size; - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - size = PyUnicode_GET_LENGTH(obj); - _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); - i += size; - break; - } - case 'V': - { - Py_ssize_t size; - PyObject *obj = va_arg(vargs, PyObject *); - va_arg(vargs, const char *); - if (obj) { - size = PyUnicode_GET_LENGTH(obj); - assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); - i += size; - } else { - size = PyUnicode_GET_LENGTH(*callresult); - assert(PyUnicode_KIND(*callresult) <= - PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - Py_DECREF(*callresult); - } - ++callresult; - break; - } - case 'S': - case 'R': - case 'A': - { - Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult); - /* unused, since we already have the result */ - (void) va_arg(vargs, PyObject *); - assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); - i += size; - /* We're done with the unicode()/repr() => forget it */ - Py_DECREF(*callresult); - /* switch to next unicode()/repr() result */ - ++callresult; - break; - } - case '%': - PyUnicode_WRITE(kind, data, i++, '%'); - break; - default: - { - Py_ssize_t len = strlen(p); - unicode_write_cstr(string, i, p, len); - i += len; - assert(i == PyUnicode_GET_LENGTH(string)); - goto end; - } - } - } - else { - assert(i < PyUnicode_GET_LENGTH(string)); - PyUnicode_WRITE(kind, data, i++, *f); + f = p; } } - assert(i == PyUnicode_GET_LENGTH(string)); + return _PyUnicodeWriter_Finish(&writer); - end: - if (callresults) - PyObject_Free(callresults); - if (numberresults) - PyObject_Free(numberresults); - return unicode_result(string); fail: - if (callresults) { - PyObject **callresult2 = callresults; - while (callresult2 < callresult) { - Py_XDECREF(*callresult2); - ++callresult2; - } - PyObject_Free(callresults); - } - if (numberresults) - PyObject_Free(numberresults); + _PyUnicodeWriter_Dealloc(&writer); return NULL; }