PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
# test "%c"
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
- self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
+ check_format('\uabcd',
+ b'%c', c_int(0xabcd))
+ check_format('\U0010ffff',
+ b'%c', c_int(0x10ffff))
+ with self.assertRaises(OverflowError):
+ PyUnicode_FromFormat(b'%c', c_int(0x110000))
# Issue #18183
- self.assertEqual(
- PyUnicode_FromFormat(b'%c%c', c_int(0x10000), c_int(0x100000)),
- '\U00010000\U00100000')
+ check_format('\U00010000\U00100000',
+ b'%c%c', c_int(0x10000), c_int(0x100000))
# test "%"
- self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
- self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
- self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
- self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
- self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
+ check_format('%',
+ b'%')
+ check_format('%',
+ b'%%')
+ check_format('%s',
+ b'%%s')
+ check_format('[%]',
+ b'[%%]')
+ check_format('%abc',
+ b'%%%s', b'abc')
+
+ # truncated string
+ check_format('abc',
+ b'%.3s', b'abcdef')
+ check_format('abc[\ufffd',
+ b'%.5s', 'abc[\u20ac]'.encode('utf8'))
+ check_format("'\\u20acABC'",
+ b'%A', '\u20acABC')
+ check_format("'\\u20",
+ b'%.5A', '\u20acABCDEF')
+ check_format("'\u20acABC'",
+ b'%R', '\u20acABC')
+ check_format("'\u20acA",
+ b'%.3R', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3S', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3U', '\u20acABCDEF')
+ check_format('\u20acAB',
+ b'%.3V', '\u20acABCDEF', None)
+ check_format('abc[\ufffd',
+ b'%.5V', None, 'abc[\u20ac]'.encode('utf8'))
+
+ # following tests comes from #7330
+ # test width modifier and precision modifier with %S
+ check_format("repr= abc",
+ b'repr=%5S', 'abc')
+ check_format("repr=ab",
+ b'repr=%.2S', 'abc')
+ check_format("repr= ab",
+ b'repr=%5.2S', 'abc')
+
+ # test width modifier and precision modifier with %R
+ check_format("repr= 'abc'",
+ b'repr=%8R', 'abc')
+ check_format("repr='ab",
+ b'repr=%.3R', 'abc')
+ check_format("repr= 'ab",
+ b'repr=%5.3R', 'abc')
+
+ # test width modifier and precision modifier with %A
+ check_format("repr= 'abc'",
+ b'repr=%8A', 'abc')
+ check_format("repr='ab",
+ b'repr=%.3A', 'abc')
+ check_format("repr= 'ab",
+ b'repr=%5.3A', 'abc')
+
+ # test width modifier and precision modifier with %s
+ check_format("repr= abc",
+ b'repr=%5s', b'abc')
+ check_format("repr=ab",
+ b'repr=%.2s', b'abc')
+ check_format("repr= ab",
+ b'repr=%5.2s', b'abc')
+
+ # test width modifier and precision modifier with %U
+ check_format("repr= abc",
+ b'repr=%5U', 'abc')
+ check_format("repr=ab",
+ b'repr=%.2U', 'abc')
+ check_format("repr= ab",
+ b'repr=%5.2U', 'abc')
+
+ # test width modifier and precision modifier with %V
+ check_format("repr= abc",
+ b'repr=%5V', 'abc', b'123')
+ check_format("repr=ab",
+ b'repr=%.2V', 'abc', b'123')
+ check_format("repr= ab",
+ b'repr=%5.2V', 'abc', b'123')
+ check_format("repr= 123",
+ b'repr=%5V', None, b'123')
+ check_format("repr=12",
+ b'repr=%.2V', None, b'123')
+ check_format("repr= 12",
+ b'repr=%5.2V', None, b'123')
# test integer formats (%i, %d, %u)
- self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
- self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
- self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
-
- self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
- self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
-
- self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
- self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
+ check_format('010',
+ b'%03i', c_int(10))
+ check_format('0010',
+ b'%0.4i', c_int(10))
+ check_format('-123',
+ b'%i', c_int(-123))
+ check_format('-123',
+ b'%li', c_long(-123))
+ check_format('-123',
+ b'%lli', c_longlong(-123))
+ check_format('-123',
+ b'%zi', c_ssize_t(-123))
+
+ check_format('-123',
+ b'%d', c_int(-123))
+ check_format('-123',
+ b'%ld', c_long(-123))
+ check_format('-123',
+ b'%lld', c_longlong(-123))
+ check_format('-123',
+ b'%zd', c_ssize_t(-123))
+
+ check_format('123',
+ b'%u', c_uint(123))
+ check_format('123',
+ b'%lu', c_ulong(123))
+ check_format('123',
+ b'%llu', c_ulonglong(123))
+ check_format('123',
+ b'%zu', c_size_t(123))
+
+ # test long output
+ min_longlong = -(2 ** (8 * sizeof(c_longlong) - 1))
+ max_longlong = -min_longlong - 1
+ check_format(str(min_longlong),
+ b'%lld', c_longlong(min_longlong))
+ check_format(str(max_longlong),
+ b'%lld', c_longlong(max_longlong))
+ max_ulonglong = 2 ** (8 * sizeof(c_ulonglong)) - 1
+ check_format(str(max_ulonglong),
+ b'%llu', c_ulonglong(max_ulonglong))
+ PyUnicode_FromFormat(b'%p', c_void_p(-1))
+
+ # test padding (width and/or precision)
+ check_format('123'.rjust(10, '0'),
+ b'%010i', c_int(123))
+ check_format('123'.rjust(100),
+ b'%100i', c_int(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100i', c_int(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80i', c_int(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010u', c_uint(123))
+ check_format('123'.rjust(100),
+ b'%100u', c_uint(123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100u', c_uint(123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80u', c_uint(123))
+
+ check_format('123'.rjust(10, '0'),
+ b'%010x', c_int(0x123))
+ check_format('123'.rjust(100),
+ b'%100x', c_int(0x123))
+ check_format('123'.rjust(100, '0'),
+ b'%.100x', c_int(0x123))
+ check_format('123'.rjust(80, '0').rjust(100),
+ b'%100.80x', c_int(0x123))
# test %A
- text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
- self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
+ check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
+ b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
# test %V
- text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
- self.assertEqual(text, 'repr=abc')
+ check_format('repr=abc',
+ b'repr=%V', 'abc', b'xyz')
# Test string decode from parameter of %s using utf-8.
# b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
size_tflag = 1;
++f;
}
- if (p_longflag != NULL)
- *p_longflag = longflag;
- if (p_longlongflag != NULL)
- *p_longlongflag = longlongflag;
- if (p_size_tflag != NULL)
- *p_size_tflag = size_tflag;
- return f;
-}
-/* maximum number of characters required for output of %ld. 21 characters
- allows for 64-bit integers (in decimal) and an optional sign. */
-#define MAX_LONG_CHARS 21
-/* maximum number of characters required for output of %lld.
- We need at most ceil(log10(256)*SIZEOF_LONG_LONG) digits,
- plus 1 for the sign. 53/22 is an upper bound for log10(256). */
-#define MAX_LONG_LONG_CHARS (2 + (SIZEOF_LONG_LONG*53-1) / 22)
+ if (f[1] == '\0')
+ writer->overallocate = 0;
-PyObject *
-PyUnicode_FromFormatV(const char *format, va_list vargs)
-{
- va_list count;
- Py_ssize_t callcount = 0;
- PyObject **callresults = NULL;
- PyObject **callresult = NULL;
- Py_ssize_t n = 0;
- int width = 0;
- int precision = 0;
- int zeropad;
- const char* f;
- PyObject *string;
- /* used by sprintf */
- char fmt[61]; /* should be enough for %0width.precisionlld */
- Py_UCS4 maxchar = 127; /* result is ASCII by default */
- Py_UCS4 argmaxchar;
- Py_ssize_t numbersize = 0;
- char *numberresults = NULL;
- char *numberresult = NULL;
- Py_ssize_t i;
- int kind;
- void *data;
+ switch (*f) {
+ case 'c':
+ {
+ int ordinal = va_arg(*vargs, int);
+ if (ordinal < 0 || ordinal > MAX_UNICODE) {
- PyErr_SetString(PyExc_ValueError,
++ PyErr_SetString(PyExc_OverflowError,
+ "character argument not in range(0x110000)");
+ return NULL;
+ }
+ if (_PyUnicodeWriter_WriteCharInline(writer, ordinal) < 0)
+ return NULL;
+ break;
+ }
- Py_VA_COPY(count, vargs);
- /* step 1: count the number of %S/%R/%A/%s format specifications
- * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII()/
- * PyUnicode_DecodeUTF8() for these objects once during step 3 and put the
- * result in an array)
- * also estimate a upper bound for all the number formats in the string,
- * numbers will be formatted in step 3 and be kept in a '\0'-separated
- * buffer before putting everything together. */
- for (f = format; *f; f++) {
- if (*f == '%') {
- int longlongflag;
- /* skip width or width.precision (eg. "1.2" of "%1.2f") */
- f = parse_format_flags(f, &width, NULL, NULL, &longlongflag, NULL);
- if (*f == 's' || *f=='S' || *f=='R' || *f=='A' || *f=='V')
- ++callcount;
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'x':
+ {
+ /* used by sprintf */
+ char fmt[10]; /* should be enough for "%0lld\0" */
+ char buffer[MAX_LONG_LONG_CHARS];
+ Py_ssize_t arglen;
- else if (*f == 'd' || *f=='u' || *f=='i' || *f=='x' || *f=='p') {
+ if (*f == 'u') {
+ makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+ if (longflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned long));
#ifdef HAVE_LONG_LONG
- if (longlongflag) {
- if (width < MAX_LONG_LONG_CHARS)
- width = MAX_LONG_LONG_CHARS;
- }
- else
+ else if (longlongflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned PY_LONG_LONG));
#endif
- /* MAX_LONG_CHARS is enough to hold a 64-bit integer,
- including sign. Decimal takes the most space. This
- isn't enough for octal. If a width is specified we
- need more (which we allocate later). */
- if (width < MAX_LONG_CHARS)
- width = MAX_LONG_CHARS;
-
- /* account for the size + '\0' to separate numbers
- inside of the numberresults buffer */
- numbersize += (width + 1);
- }
+ else if (size_tflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, size_t));
+ else
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, unsigned int));
+ }
+ else if (*f == 'x') {
+ makefmt(fmt, 0, 0, 0, 'x');
+ len = sprintf(buffer, fmt, va_arg(*vargs, int));
+ }
+ else {
+ makefmt(fmt, longflag, longlongflag, size_tflag, *f);
+
+ if (longflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, long));
+#ifdef HAVE_LONG_LONG
+ else if (longlongflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, PY_LONG_LONG));
+#endif
+ else if (size_tflag)
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, Py_ssize_t));
+ else
+ len = sprintf(buffer, fmt,
+ va_arg(*vargs, int));
}
- else if ((unsigned char)*f > 127) {
- PyErr_Format(PyExc_ValueError,
- "PyUnicode_FromFormatV() expects an ASCII-encoded format "
- "string, got a non-ASCII byte: 0x%02x",
- (unsigned char)*f);
+ assert(len >= 0);
+
+ if (precision < len)
+ precision = len;
+
+ arglen = Py_MAX(precision, width);
+ assert(ucs1lib_find_max_char((Py_UCS1*)buffer, (Py_UCS1*)buffer + len) <= 127);
+ if (_PyUnicodeWriter_Prepare(writer, arglen, 127) == -1)
return NULL;
+
+ if (width > precision) {
+ Py_UCS4 fillchar;
+ fill = width - precision;
+ fillchar = zeropad?'0':' ';
+ if (PyUnicode_Fill(writer->buffer, writer->pos, fill, fillchar) == -1)
+ return NULL;
+ writer->pos += fill;
}
+ if (precision > len) {
+ fill = precision - len;
+ if (PyUnicode_Fill(writer->buffer, writer->pos, fill, '0') == -1)
+ return NULL;
+ writer->pos += fill;
+ }
+
+ unicode_write_cstr(writer->buffer, writer->pos, buffer, len);
+ writer->pos += len;
+ break;
}
- /* step 2: allocate memory for the results of
- * PyObject_Str()/PyObject_Repr()/PyUnicode_DecodeUTF8() calls */
- if (callcount) {
- callresults = PyObject_Malloc(sizeof(PyObject *) * callcount);
- if (!callresults) {
- PyErr_NoMemory();
+
+ case 'p':
+ {
+ char number[MAX_LONG_LONG_CHARS];
+
+ len = sprintf(number, "%p", va_arg(*vargs, void*));
+ assert(len >= 0);
+
+ /* %p is ill-defined: ensure leading 0x. */
+ if (number[1] == 'X')
+ number[1] = 'x';
+ else if (number[1] != 'x') {
+ memmove(number + 2, number,
+ strlen(number) + 1);
+ number[0] = '0';
+ number[1] = 'x';
+ len += 2;
+ }
+
+ assert(ucs1lib_find_max_char((Py_UCS1*)number, (Py_UCS1*)number + len) <= 127);
+ if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+ return NULL;
+ unicode_write_cstr(writer->buffer, writer->pos, number, len);
+ writer->pos += len;
+ break;
+ }
+
+ case 's':
+ {
+ /* UTF-8 */
+ const char *s = va_arg(*vargs, const char*);
+ if (unicode_fromformat_write_cstr(writer, s, width, precision) < 0)
+ return NULL;
+ break;
+ }
+
+ case 'U':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ assert(obj && _PyUnicode_CHECK(obj));
+
+ if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
return NULL;
+ break;
+ }
+
+ case 'V':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ const char *str = va_arg(*vargs, const char *);
+ if (obj) {
+ assert(_PyUnicode_CHECK(obj));
+ if (unicode_fromformat_write_str(writer, obj, width, precision) == -1)
+ return NULL;
+ }
+ else {
+ assert(str != NULL);
+ if (unicode_fromformat_write_cstr(writer, str, width, precision) < 0)
+ return NULL;
}
- callresult = callresults;
+ break;
}
- /* step 2.5: allocate memory for the results of formating numbers */
- if (numbersize) {
- numberresults = PyObject_Malloc(numbersize);
- if (!numberresults) {
- PyErr_NoMemory();
- goto fail;
+
+ case 'S':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *str;
+ assert(obj);
+ str = PyObject_Str(obj);
+ if (!str)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, str, width, precision) == -1) {
+ Py_DECREF(str);
+ return NULL;
+ }
+ Py_DECREF(str);
+ break;
+ }
+
+ case 'R':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *repr;
+ assert(obj);
+ repr = PyObject_Repr(obj);
+ if (!repr)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, repr, width, precision) == -1) {
+ Py_DECREF(repr);
+ return NULL;
+ }
+ Py_DECREF(repr);
+ break;
+ }
+
+ case 'A':
+ {
+ PyObject *obj = va_arg(*vargs, PyObject *);
+ PyObject *ascii;
+ assert(obj);
+ ascii = PyObject_ASCII(obj);
+ if (!ascii)
+ return NULL;
+ if (unicode_fromformat_write_str(writer, ascii, width, precision) == -1) {
+ Py_DECREF(ascii);
+ return NULL;
}
- numberresult = numberresults;
+ Py_DECREF(ascii);
+ break;
+ }
+
+ case '%':
+ if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
+ return NULL;
+ break;
+
+ default:
+ /* if we stumble upon an unknown formatting code, copy the rest
+ of the format string to the output string. (we cannot just
+ skip the code, since there's no way to know what's in the
+ argument list) */
+ len = strlen(p);
+ if (_PyUnicodeWriter_WriteCstr(writer, p, len) == -1)
+ return NULL;
+ f = p+len;
+ return f;
}
- /* step 3: format numbers and figure out how large a buffer we need */
- for (f = format; *f; f++) {
+ f++;
+ return f;
+}
+
+PyObject *
+PyUnicode_FromFormatV(const char *format, va_list vargs)
+{
+ va_list vargs2;
+ const char *f;
+ _PyUnicodeWriter writer;
+
+ _PyUnicodeWriter_Init(&writer);
+ writer.min_length = strlen(format) + 100;
+ writer.overallocate = 1;
+
+ /* va_list may be an array (of 1 item) on some platforms (ex: AMD64).
+ Copy it to be able to pass a reference to a subfunction. */
+ Py_VA_COPY(vargs2, vargs);
+
+ for (f = format; *f; ) {
if (*f == '%') {
- const char* p;
- int longflag;
- int longlongflag;
- int size_tflag;
- int numprinted;
+ f = unicode_fromformat_arg(&writer, f, &vargs2);
+ if (f == NULL)
+ goto fail;
+ }
+ else {
+ const char *p;
+ Py_ssize_t len;
p = f;
- zeropad = (f[1] == '0');
- f = parse_format_flags(f, &width, &precision,
- &longflag, &longlongflag, &size_tflag);
- switch (*f) {
- case 'c':
+ do
{
- int ordinal = va_arg(count, int);
- if (ordinal < 0 || ordinal > MAX_UNICODE) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000)");
- goto fail;
+ if ((unsigned char)*p > 127) {
+ PyErr_Format(PyExc_ValueError,
+ "PyUnicode_FromFormatV() expects an ASCII-encoded format "
+ "string, got a non-ASCII byte: 0x%02x",
+ (unsigned char)*p);
+ return NULL;
}
- maxchar = Py_MAX(maxchar, (Py_UCS4)ordinal);
- n++;
- break;
+ p++;
}
- case '%':
- n++;
- break;
- case 'i':
- case 'd':
- makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
- width, precision, *f);
- if (longflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, long));
-#ifdef HAVE_LONG_LONG
- else if (longlongflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, PY_LONG_LONG));
-#endif
- else if (size_tflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, Py_ssize_t));
- else
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, int));
- n += numprinted;
- /* advance by +1 to skip over the '\0' */
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'u':
- makefmt(fmt, longflag, longlongflag, size_tflag, zeropad,
- width, precision, 'u');
- if (longflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned long));
-#ifdef HAVE_LONG_LONG
- else if (longlongflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned PY_LONG_LONG));
-#endif
- else if (size_tflag)
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, size_t));
- else
- numprinted = sprintf(numberresult, fmt,
- va_arg(count, unsigned int));
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'x':
- makefmt(fmt, 0, 0, 0, zeropad, width, precision, 'x');
- numprinted = sprintf(numberresult, fmt, va_arg(count, int));
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 'p':
- numprinted = sprintf(numberresult, "%p", va_arg(count, void*));
- /* %p is ill-defined: ensure leading 0x. */
- if (numberresult[1] == 'X')
- numberresult[1] = 'x';
- else if (numberresult[1] != 'x') {
- memmove(numberresult + 2, numberresult,
- strlen(numberresult) + 1);
- numberresult[0] = '0';
- numberresult[1] = 'x';
- numprinted += 2;
- }
- n += numprinted;
- numberresult += (numprinted + 1);
- assert(*(numberresult - 1) == '\0');
- assert(*(numberresult - 2) != '\0');
- assert(numprinted >= 0);
- assert(numberresult <= numberresults + numbersize);
- break;
- case 's':
- {
- /* UTF-8 */
- const char *s = va_arg(count, const char*);
- PyObject *str = PyUnicode_DecodeUTF8Stateful(s, strlen(s), "replace", NULL);
- if (!str)
- goto fail;
- /* since PyUnicode_DecodeUTF8 returns already flexible
- unicode objects, there is no need to call ready on them */
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str);
- /* Remember the str and switch to the next slot */
- *callresult++ = str;
- break;
- }
- case 'U':
- {
- PyObject *obj = va_arg(count, PyObject *);
- assert(obj && _PyUnicode_CHECK(obj));
- if (PyUnicode_READY(obj) == -1)
- goto fail;
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(obj);
- break;
- }
- case 'V':
- {
- PyObject *obj = va_arg(count, PyObject *);
- const char *str = va_arg(count, const char *);
- PyObject *str_obj;
- assert(obj || str);
- assert(!obj || _PyUnicode_CHECK(obj));
- if (obj) {
- if (PyUnicode_READY(obj) == -1)
- goto fail;
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(obj);
- *callresult++ = NULL;
- }
- else {
- str_obj = PyUnicode_DecodeUTF8Stateful(str, strlen(str), "replace", NULL);
- if (!str_obj)
- goto fail;
- if (PyUnicode_READY(str_obj) == -1) {
- Py_DECREF(str_obj);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str_obj);
- *callresult++ = str_obj;
- }
- break;
- }
- case 'S':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *str;
- assert(obj);
- str = PyObject_Str(obj);
- if (!str)
- goto fail;
- if (PyUnicode_READY(str) == -1) {
- Py_DECREF(str);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(str);
- /* Remember the str and switch to the next slot */
- *callresult++ = str;
- break;
- }
- case 'R':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *repr;
- assert(obj);
- repr = PyObject_Repr(obj);
- if (!repr)
- goto fail;
- if (PyUnicode_READY(repr) == -1) {
- Py_DECREF(repr);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(repr);
- /* Remember the repr and switch to the next slot */
- *callresult++ = repr;
- break;
- }
- case 'A':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *ascii;
- assert(obj);
- ascii = PyObject_ASCII(obj);
- if (!ascii)
- goto fail;
- if (PyUnicode_READY(ascii) == -1) {
- Py_DECREF(ascii);
- goto fail;
- }
- argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
- maxchar = Py_MAX(maxchar, argmaxchar);
- n += PyUnicode_GET_LENGTH(ascii);
- /* Remember the repr and switch to the next slot */
- *callresult++ = ascii;
- break;
- }
- default:
- /* if we stumble upon an unknown
- formatting code, copy the rest of
- the format string to the output
- string. (we cannot just skip the
- code, since there's no way to know
- what's in the argument list) */
- n += strlen(p);
- goto expand;
- }
- } else
- n++;
- }
- expand:
- /* step 4: fill the buffer */
- /* Since we've analyzed how much space we need,
- we don't have to resize the string.
- There can be no errors beyond this point. */
- string = PyUnicode_New(n, maxchar);
- if (!string)
- goto fail;
- kind = PyUnicode_KIND(string);
- data = PyUnicode_DATA(string);
- callresult = callresults;
- numberresult = numberresults;
+ while (*p != '\0' && *p != '%');
+ len = p - f;
- for (i = 0, f = format; *f; f++) {
- if (*f == '%') {
- const char* p;
+ if (*p == '\0')
+ writer.overallocate = 0;
+ if (_PyUnicodeWriter_Prepare(&writer, len, 127) == -1)
+ goto fail;
+ unicode_write_cstr(writer.buffer, writer.pos, f, len);
+ writer.pos += len;
- p = f;
- f = parse_format_flags(f, NULL, NULL, NULL, NULL, NULL);
- /* checking for == because the last argument could be a empty
- string, which causes i to point to end, the assert at the end of
- the loop */
- assert(i <= PyUnicode_GET_LENGTH(string));
-
- switch (*f) {
- case 'c':
- {
- const int ordinal = va_arg(vargs, int);
- PyUnicode_WRITE(kind, data, i++, ordinal);
- break;
- }
- case 'i':
- case 'd':
- case 'u':
- case 'x':
- case 'p':
- {
- Py_ssize_t len;
- /* unused, since we already have the result */
- if (*f == 'p')
- (void) va_arg(vargs, void *);
- else
- (void) va_arg(vargs, int);
- /* extract the result from numberresults and append. */
- len = strlen(numberresult);
- unicode_write_cstr(string, i, numberresult, len);
- /* skip over the separating '\0' */
- i += len;
- numberresult += len;
- assert(*numberresult == '\0');
- numberresult++;
- assert(numberresult <= numberresults + numbersize);
- break;
- }
- case 's':
- {
- /* unused, since we already have the result */
- Py_ssize_t size;
- (void) va_arg(vargs, char *);
- size = PyUnicode_GET_LENGTH(*callresult);
- assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- /* We're done with the unicode()/repr() => forget it */
- Py_DECREF(*callresult);
- /* switch to next unicode()/repr() result */
- ++callresult;
- break;
- }
- case 'U':
- {
- PyObject *obj = va_arg(vargs, PyObject *);
- Py_ssize_t size;
- assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
- size = PyUnicode_GET_LENGTH(obj);
- _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
- i += size;
- break;
- }
- case 'V':
- {
- Py_ssize_t size;
- PyObject *obj = va_arg(vargs, PyObject *);
- va_arg(vargs, const char *);
- if (obj) {
- size = PyUnicode_GET_LENGTH(obj);
- assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
- i += size;
- } else {
- size = PyUnicode_GET_LENGTH(*callresult);
- assert(PyUnicode_KIND(*callresult) <=
- PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- Py_DECREF(*callresult);
- }
- ++callresult;
- break;
- }
- case 'S':
- case 'R':
- case 'A':
- {
- Py_ssize_t size = PyUnicode_GET_LENGTH(*callresult);
- /* unused, since we already have the result */
- (void) va_arg(vargs, PyObject *);
- assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
- i += size;
- /* We're done with the unicode()/repr() => forget it */
- Py_DECREF(*callresult);
- /* switch to next unicode()/repr() result */
- ++callresult;
- break;
- }
- case '%':
- PyUnicode_WRITE(kind, data, i++, '%');
- break;
- default:
- {
- Py_ssize_t len = strlen(p);
- unicode_write_cstr(string, i, p, len);
- i += len;
- assert(i == PyUnicode_GET_LENGTH(string));
- goto end;
- }
- }
- }
- else {
- assert(i < PyUnicode_GET_LENGTH(string));
- PyUnicode_WRITE(kind, data, i++, *f);
+ f = p;
}
}
- assert(i == PyUnicode_GET_LENGTH(string));
+ return _PyUnicodeWriter_Finish(&writer);
- end:
- if (callresults)
- PyObject_Free(callresults);
- if (numberresults)
- PyObject_Free(numberresults);
- return unicode_result(string);
fail:
- if (callresults) {
- PyObject **callresult2 = callresults;
- while (callresult2 < callresult) {
- Py_XDECREF(*callresult2);
- ++callresult2;
- }
- PyObject_Free(callresults);
- }
- if (numberresults)
- PyObject_Free(numberresults);
+ _PyUnicodeWriter_Dealloc(&writer);
return NULL;
}