From: Benjamin Peterson Date: Mon, 10 Jun 2013 16:24:01 +0000 (-0700) Subject: merge 3.3 (#18183) X-Git-Tag: v3.4.0a1~536 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3164f5d56570d6d6eef9dfde5e85347805eb910a;p=python merge 3.3 (#18183) --- 3164f5d56570d6d6eef9dfde5e85347805eb910a diff --cc Objects/unicodeobject.c index 4c3ecd6f76,1c48197b3c..c40e9ece5a --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -10074,8 -10055,8 +10069,8 @@@ replace(PyObject *self, PyObject *str1 maxchar_str2 = PyUnicode_MAX_CHAR_VALUE(str2); /* Replacing str1 with str2 may cause a maxchar reduction in the result string. */ - mayshrink = (maxchar_str2 < maxchar); + mayshrink = (maxchar_str2 < maxchar_str1) && (maxchar == maxchar_str1); - maxchar = MAX_MAXCHAR(maxchar, maxchar_str2); + maxchar = Py_MAX(maxchar, maxchar_str2); if (len1 == len2) { /* same length */ @@@ -12993,17 -12823,14 +12988,17 @@@ _PyUnicodeWriter_PrepareInternal(_PyUni } newlen = writer->pos + length; - maxchar = MAX_MAXCHAR(maxchar, writer->min_char); ++ maxchar = Py_MAX(maxchar, writer->min_char); + if (writer->buffer == NULL) { - if (writer->overallocate) { + assert(!writer->readonly); + if (writer->overallocate && newlen <= (PY_SSIZE_T_MAX - newlen / 4)) { /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; - if (newlen < writer->min_length) - newlen = writer->min_length; + newlen += newlen / 4; } + if (newlen < writer->min_length) + newlen = writer->min_length; + writer->buffer = PyUnicode_New(newlen, maxchar); if (writer->buffer == NULL) return -1; @@@ -13762,571 -13409,520 +13757,571 @@@ formatchar(PyObject *v return (Py_UCS4) -1; } -PyObject * -PyUnicode_Format(PyObject *format, PyObject *args) -{ - Py_ssize_t fmtcnt, fmtpos, arglen, argidx; - int args_owned = 0; - PyObject *dict = NULL; - PyObject *temp = NULL; - PyObject *second = NULL; - PyObject *uformat; - void *fmt; - enum PyUnicode_Kind kind, fmtkind; - _PyUnicodeWriter writer; - Py_ssize_t sublen; - Py_UCS4 maxchar; +/* Parse options of an argument: flags, width, precision. + Handle also "%(name)" syntax. - if (format == NULL || args == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - uformat = PyUnicode_FromObject(format); - if (uformat == NULL) - return NULL; - if (PyUnicode_READY(uformat) == -1) { - Py_DECREF(uformat); - return NULL; - } + Return 0 if the argument has been formatted into arg->str. + Return 1 if the argument has been written into ctx->writer, + Raise an exception and return -1 on error. */ +static int +unicode_format_arg_parse(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg) +{ +#define FORMAT_READ(ctx) \ + PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos) - fmt = PyUnicode_DATA(uformat); - fmtkind = PyUnicode_KIND(uformat); - fmtcnt = PyUnicode_GET_LENGTH(uformat); - fmtpos = 0; + PyObject *v; - _PyUnicodeWriter_Init(&writer, fmtcnt + 100); + if (arg->ch == '(') { + /* Get argument value from a dictionary. Example: "%(name)s". */ + Py_ssize_t keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; - if (PyTuple_Check(args)) { - arglen = PyTuple_Size(args); - argidx = 0; - } - else { - arglen = -1; - argidx = -2; + if (ctx->dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + return -1; + } + ++ctx->fmtpos; + --ctx->fmtcnt; + keystart = ctx->fmtpos; + /* Skip over balanced parentheses */ + while (pcount > 0 && --ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + if (arg->ch == ')') + --pcount; + else if (arg->ch == '(') + ++pcount; + ctx->fmtpos++; + } + keylen = ctx->fmtpos - keystart - 1; + if (ctx->fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + return -1; + } + key = PyUnicode_Substring(ctx->fmtstr, + keystart, keystart + keylen); + if (key == NULL) + return -1; + if (ctx->args_owned) { + Py_DECREF(ctx->args); + ctx->args_owned = 0; + } + ctx->args = PyObject_GetItem(ctx->dict, key); + Py_DECREF(key); + if (ctx->args == NULL) + return -1; + ctx->args_owned = 1; + ctx->arglen = -1; + ctx->argidx = -2; + } + + /* Parse flags. Example: "%+i" => flags=F_SIGN. */ + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + switch (arg->ch) { + case '-': arg->flags |= F_LJUST; continue; + case '+': arg->flags |= F_SIGN; continue; + case ' ': arg->flags |= F_BLANK; continue; + case '#': arg->flags |= F_ALT; continue; + case '0': arg->flags |= F_ZERO; continue; + } + break; } - if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args)) - dict = args; - while (--fmtcnt >= 0) { - if (PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - Py_ssize_t nonfmtpos; - nonfmtpos = fmtpos++; - while (fmtcnt >= 0 && - PyUnicode_READ(fmtkind, fmt, fmtpos) != '%') { - fmtpos++; - fmtcnt--; + /* Parse width. Example: "%10s" => width=10 */ + if (arg->ch == '*') { + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + return -1; + } + arg->width = PyLong_AsSsize_t(v); + if (arg->width == -1 && PyErr_Occurred()) + return -1; + if (arg->width < 0) { + arg->flags |= F_LJUST; + arg->width = -arg->width; + } + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + } + } + else if (arg->ch >= '0' && arg->ch <= '9') { + arg->width = arg->ch - '0'; + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + if (arg->ch < '0' || arg->ch > '9') + break; + /* Since arg->ch is unsigned, the RHS would end up as unsigned, + mixing signed and unsigned comparison. Since arg->ch is between + '0' and '9', casting to int is safe. */ + if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "width too big"); + return -1; } - if (fmtcnt < 0) - fmtpos--; - sublen = fmtpos - nonfmtpos; - maxchar = _PyUnicode_FindMaxChar(uformat, - nonfmtpos, nonfmtpos + sublen); - if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) - goto onError; + arg->width = arg->width*10 + (arg->ch - '0'); + } + } - _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, - uformat, nonfmtpos, sublen); - writer.pos += sublen; + /* Parse precision. Example: "%.3f" => prec=3 */ + if (arg->ch == '.') { + arg->prec = 0; + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; } - else { - /* Got a format specifier */ - int flags = 0; - Py_ssize_t width = -1; - int prec = -1; - Py_UCS4 c = '\0'; - Py_UCS4 fill; - int sign; - Py_UCS4 signchar; - int isnumok; - PyObject *v = NULL; - void *pbuf = NULL; - Py_ssize_t pindex, len; - Py_UCS4 bufmaxchar; - Py_ssize_t buflen; - - fmtpos++; - c = PyUnicode_READ(fmtkind, fmt, fmtpos); - if (c == '(') { - Py_ssize_t keystart; - Py_ssize_t keylen; - PyObject *key; - int pcount = 1; - - if (dict == NULL) { - PyErr_SetString(PyExc_TypeError, - "format requires a mapping"); - goto onError; - } - ++fmtpos; - --fmtcnt; - keystart = fmtpos; - /* Skip over balanced parentheses */ - while (pcount > 0 && --fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos); - if (c == ')') - --pcount; - else if (c == '(') - ++pcount; - fmtpos++; - } - keylen = fmtpos - keystart - 1; - if (fmtcnt < 0 || pcount > 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format key"); - goto onError; - } - key = PyUnicode_Substring(uformat, - keystart, keystart + keylen); - if (key == NULL) - goto onError; - if (args_owned) { - Py_DECREF(args); - args_owned = 0; - } - args = PyObject_GetItem(dict, key); - Py_DECREF(key); - if (args == NULL) { - goto onError; - } - args_owned = 1; - arglen = -1; - argidx = -2; - } - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - switch (c) { - case '-': flags |= F_LJUST; continue; - case '+': flags |= F_SIGN; continue; - case ' ': flags |= F_BLANK; continue; - case '#': flags |= F_ALT; continue; - case '0': flags |= F_ZERO; continue; - } - break; - } - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - width = PyLong_AsSsize_t(v); - if (width == -1 && PyErr_Occurred()) - goto onError; - if (width < 0) { - flags |= F_LJUST; - width = -width; - } - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - width = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - /* Since c is unsigned, the RHS would end up as unsigned, - mixing signed and unsigned comparison. Since c is between - '0' and '9', casting to int is safe. */ - if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "width too big"); - goto onError; - } - width = width*10 + (c - '0'); - } + if (arg->ch == '*') { + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + return -1; } - if (c == '.') { - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c == '*') { - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; - if (!PyLong_Check(v)) { - PyErr_SetString(PyExc_TypeError, - "* wants int"); - goto onError; - } - prec = _PyLong_AsInt(v); - if (prec == -1 && PyErr_Occurred()) - goto onError; - if (prec < 0) - prec = 0; - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - } - else if (c >= '0' && c <= '9') { - prec = c - '0'; - while (--fmtcnt >= 0) { - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); - if (c < '0' || c > '9') - break; - if (prec > (INT_MAX - ((int)c - '0')) / 10) { - PyErr_SetString(PyExc_ValueError, - "prec too big"); - goto onError; - } - prec = prec*10 + (c - '0'); - } - } - } /* prec */ - if (fmtcnt >= 0) { - if (c == 'h' || c == 'l' || c == 'L') { - if (--fmtcnt >= 0) - c = PyUnicode_READ(fmtkind, fmt, fmtpos++); + arg->prec = _PyLong_AsInt(v); + if (arg->prec == -1 && PyErr_Occurred()) + return -1; + if (arg->prec < 0) + arg->prec = 0; + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + } + } + else if (arg->ch >= '0' && arg->ch <= '9') { + arg->prec = arg->ch - '0'; + while (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; + if (arg->ch < '0' || arg->ch > '9') + break; + if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) { + PyErr_SetString(PyExc_ValueError, + "precision too big"); + return -1; } + arg->prec = arg->prec*10 + (arg->ch - '0'); } - if (fmtcnt < 0) { - PyErr_SetString(PyExc_ValueError, - "incomplete format"); - goto onError; - } - if (fmtcnt == 0) - writer.overallocate = 0; + } + } - if (c == '%') { - if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) - goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '%'); - writer.pos += 1; - continue; + /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */ + if (ctx->fmtcnt >= 0) { + if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') { + if (--ctx->fmtcnt >= 0) { + arg->ch = FORMAT_READ(ctx); + ctx->fmtpos++; } + } + } + if (ctx->fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + return -1; + } + return 0; - v = getnextarg(args, arglen, &argidx); - if (v == NULL) - goto onError; +#undef FORMAT_READ +} - sign = 0; - signchar = '\0'; - fill = ' '; - switch (c) { - - case 's': - case 'r': - case 'a': - if (PyLong_CheckExact(v) && width == -1 && prec == -1) { - /* Fast path */ - if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) - goto onError; - goto nextarg; - } +/* Format one argument. Supported conversion specifiers: - if (PyUnicode_CheckExact(v) && c == 's') { - temp = v; - Py_INCREF(temp); - } - else { - if (c == 's') - temp = PyObject_Str(v); - else if (c == 'r') - temp = PyObject_Repr(v); - else - temp = PyObject_ASCII(v); - } - break; + - "s", "r", "a": any type + - "i", "d", "u", "o", "x", "X": int + - "e", "E", "f", "F", "g", "G": float + - "c": int or str (1 character) - case 'i': - case 'd': - case 'u': - case 'o': - case 'x': - case 'X': - if (PyLong_CheckExact(v) - && width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - switch(c) - { - case 'd': - case 'i': - case 'u': - if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'x': - if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'o': - if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1) - goto onError; - goto nextarg; - default: - break; - } - } + When possible, the output is written directly into the Unicode writer + (ctx->writer). A string is created when padding is required. - isnumok = 0; - if (PyNumber_Check(v)) { - PyObject *iobj=NULL; + Return 0 if the argument has been formatted into *p_str, + 1 if the argument has been written into ctx->writer, + -1 on error. */ +static int +unicode_format_arg_format(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg, + PyObject **p_str) +{ + PyObject *v; + _PyUnicodeWriter *writer = &ctx->writer; - if (PyLong_Check(v)) { - iobj = v; - Py_INCREF(iobj); - } - else { - iobj = PyNumber_Long(v); - } - if (iobj!=NULL) { - if (PyLong_Check(iobj)) { - isnumok = 1; - sign = 1; - temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); - Py_DECREF(iobj); - } - else { - Py_DECREF(iobj); - } - } - } - if (!isnumok) { - PyErr_Format(PyExc_TypeError, - "%%%c format: a number is required, " - "not %.200s", (char)c, Py_TYPE(v)->tp_name); - goto onError; - } - if (flags & F_ZERO) - fill = '0'; - break; + if (ctx->fmtcnt == 0) + ctx->writer.overallocate = 0; - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - if (width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - if (formatfloat(v, flags, prec, c, NULL, &writer) == -1) - goto onError; - goto nextarg; - } + if (arg->ch == '%') { + if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0) + return -1; + return 1; + } - sign = 1; - if (flags & F_ZERO) - fill = '0'; - if (formatfloat(v, flags, prec, c, &temp, NULL) == -1) - temp = NULL; - break; + v = unicode_format_getnextarg(ctx); + if (v == NULL) + return -1; - case 'c': - { - Py_UCS4 ch = formatchar(v); - if (ch == (Py_UCS4) -1) - goto onError; - if (width == -1 && prec == -1) { - /* Fast path */ - if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) - goto onError; - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); - writer.pos += 1; - goto nextarg; - } - temp = PyUnicode_FromOrdinal(ch); - break; - } - default: - PyErr_Format(PyExc_ValueError, - "unsupported format character '%c' (0x%x) " - "at index %zd", - (31<=c && c<=126) ? (char)c : '?', - (int)c, - fmtpos - 1); - goto onError; - } - if (temp == NULL) - goto onError; - assert (PyUnicode_Check(temp)); + switch (arg->ch) { + case 's': + case 'r': + case 'a': + if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) { + /* Fast path */ + if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1) + return -1; + return 1; + } - if (width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1) - goto onError; - goto nextarg; - } + if (PyUnicode_CheckExact(v) && arg->ch == 's') { + *p_str = v; + Py_INCREF(*p_str); + } + else { + if (arg->ch == 's') + *p_str = PyObject_Str(v); + else if (arg->ch == 'r') + *p_str = PyObject_Repr(v); + else + *p_str = PyObject_ASCII(v); + } + break; - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - kind = PyUnicode_KIND(temp); - pbuf = PyUnicode_DATA(temp); - len = PyUnicode_GET_LENGTH(temp); + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + { + int ret = mainformatlong(v, arg, p_str, writer); + if (ret != 0) + return ret; + arg->sign = 1; + break; + } - if (c == 's' || c == 'r' || c == 'a') { - if (prec >= 0 && len > prec) - len = prec; - } + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + if (arg->width == -1 && arg->prec == -1 + && !(arg->flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (formatfloat(v, arg, NULL, writer) == -1) + return -1; + return 1; + } - /* pbuf is initialized here. */ - pindex = 0; - if (sign) { - Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); - if (ch == '-' || ch == '+') { - signchar = ch; - len--; - pindex++; - } - else if (flags & F_SIGN) - signchar = '+'; - else if (flags & F_BLANK) - signchar = ' '; - else - sign = 0; - } - if (width < len) - width = len; - - /* Compute the length and maximum character of the - written characters */ - bufmaxchar = 127; - if (!(flags & F_LJUST)) { - if (sign) { - if ((width-1) > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); - } - else { - if (width > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); - } - } - maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); - bufmaxchar = Py_MAX(bufmaxchar, maxchar); + arg->sign = 1; + if (formatfloat(v, arg, p_str, NULL) == -1) + return -1; + break; - buflen = width; - if (sign && len == width) - buflen++; + case 'c': + { + Py_UCS4 ch = formatchar(v); + if (ch == (Py_UCS4) -1) + return -1; + if (arg->width == -1 && arg->prec == -1) { + /* Fast path */ + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) + return -1; + return 1; + } + *p_str = PyUnicode_FromOrdinal(ch); + break; + } - if (_PyUnicodeWriter_Prepare(&writer, buflen, bufmaxchar) == -1) - goto onError; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + (31<=arg->ch && arg->ch<=126) ? (char)arg->ch : '?', + (int)arg->ch, + ctx->fmtpos - 1); + return -1; + } + if (*p_str == NULL) + return -1; + assert (PyUnicode_Check(*p_str)); + return 0; +} - /* Write characters */ - if (sign) { - if (fill != ' ') { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); - writer.pos += 1; - } - if (width > len) - width--; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); - if (fill != ' ') { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); - writer.pos += 2; - pindex += 2; - } - width -= 2; - if (width < 0) - width = 0; - len -= 2; - } - if (width > len && !(flags & F_LJUST)) { - sublen = width - len; - FILL(writer.kind, writer.data, fill, writer.pos, sublen); - writer.pos += sublen; - width = len; - } - if (fill == ' ') { - if (sign) { - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, signchar); - writer.pos += 1; - } - if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { - assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); - assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); - PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); - writer.pos += 2; - pindex += 2; - } - } +static int +unicode_format_arg_output(struct unicode_formatter_t *ctx, + struct unicode_format_arg_t *arg, + PyObject *str) +{ + Py_ssize_t len; + enum PyUnicode_Kind kind; + void *pbuf; + Py_ssize_t pindex; + Py_UCS4 signchar; + Py_ssize_t buflen; + Py_UCS4 maxchar; + Py_ssize_t sublen; + _PyUnicodeWriter *writer = &ctx->writer; + Py_UCS4 fill; + + fill = ' '; + if (arg->sign && arg->flags & F_ZERO) + fill = '0'; + + if (PyUnicode_READY(str) == -1) + return -1; + + len = PyUnicode_GET_LENGTH(str); + if ((arg->width == -1 || arg->width <= len) + && (arg->prec == -1 || arg->prec >= len) + && !(arg->flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (_PyUnicodeWriter_WriteStr(writer, str) == -1) + return -1; + return 0; + } + + /* Truncate the string for "s", "r" and "a" formats + if the precision is set */ + if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') { + if (arg->prec >= 0 && len > arg->prec) + len = arg->prec; + } + + /* Adjust sign and width */ + kind = PyUnicode_KIND(str); + pbuf = PyUnicode_DATA(str); + pindex = 0; + signchar = '\0'; + if (arg->sign) { + Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); + if (ch == '-' || ch == '+') { + signchar = ch; + len--; + pindex++; + } + else if (arg->flags & F_SIGN) + signchar = '+'; + else if (arg->flags & F_BLANK) + signchar = ' '; + else + arg->sign = 0; + } + if (arg->width < len) + arg->width = len; + + /* Prepare the writer */ + maxchar = writer->maxchar; + if (!(arg->flags & F_LJUST)) { + if (arg->sign) { + if ((arg->width-1) > len) - maxchar = MAX_MAXCHAR(maxchar, fill); ++ maxchar = Py_MAX(maxchar, fill); + } + else { + if (arg->width > len) - maxchar = MAX_MAXCHAR(maxchar, fill); ++ maxchar = Py_MAX(maxchar, fill); + } + } + if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) { + Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len); - maxchar = MAX_MAXCHAR(maxchar, strmaxchar); ++ maxchar = Py_MAX(maxchar, strmaxchar); + } + + buflen = arg->width; + if (arg->sign && len == arg->width) + buflen++; + if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1) + return -1; + + /* Write the sign if needed */ + if (arg->sign) { + if (fill != ' ') { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); + writer->pos += 1; + } + if (arg->width > len) + arg->width--; + } + + /* Write the numeric prefix for "x", "X" and "o" formats + if the alternate form is used. + For example, write "0x" for the "%#x" format. */ + if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch); + if (fill != ' ') { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); + writer->pos += 2; + pindex += 2; + } + arg->width -= 2; + if (arg->width < 0) + arg->width = 0; + len -= 2; + } + + /* Pad left with the fill character if needed */ + if (arg->width > len && !(arg->flags & F_LJUST)) { + sublen = arg->width - len; + FILL(writer->kind, writer->data, fill, writer->pos, sublen); + writer->pos += sublen; + arg->width = len; + } + + /* If padding with spaces: write sign if needed and/or numeric prefix if + the alternate form is used */ + if (fill == ' ') { + if (arg->sign) { + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); + writer->pos += 1; + } + if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { + assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); + assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); + PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); + writer->pos += 2; + pindex += 2; + } + } + + /* Write characters */ + if (len) { + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, pindex, len); + writer->pos += len; + } + + /* Pad right with the fill character if needed */ + if (arg->width > len) { + sublen = arg->width - len; + FILL(writer->kind, writer->data, ' ', writer->pos, sublen); + writer->pos += sublen; + } + return 0; +} + +/* Helper of PyUnicode_Format(): format one arg. + Return 0 on success, raise an exception and return -1 on error. */ +static int +unicode_format_arg(struct unicode_formatter_t *ctx) +{ + struct unicode_format_arg_t arg; + PyObject *str; + int ret; + + arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos); + arg.flags = 0; + arg.width = -1; + arg.prec = -1; + arg.sign = 0; + str = NULL; + + ret = unicode_format_arg_parse(ctx, &arg); + if (ret == -1) + return -1; + + ret = unicode_format_arg_format(ctx, &arg, &str); + if (ret == -1) + return -1; + + if (ret != 1) { + ret = unicode_format_arg_output(ctx, &arg, str); + Py_DECREF(str); + if (ret == -1) + return -1; + } + + if (ctx->dict && (ctx->argidx < ctx->arglen) && arg.ch != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + return -1; + } + return 0; +} + +PyObject * +PyUnicode_Format(PyObject *format, PyObject *args) +{ + struct unicode_formatter_t ctx; - if (len) { - _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, - temp, pindex, len); - writer.pos += len; + if (format == NULL || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + + ctx.fmtstr = PyUnicode_FromObject(format); + if (ctx.fmtstr == NULL) + return NULL; + if (PyUnicode_READY(ctx.fmtstr) == -1) { + Py_DECREF(ctx.fmtstr); + return NULL; + } + ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr); + ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr); + ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr); + ctx.fmtpos = 0; + + _PyUnicodeWriter_Init(&ctx.writer); + ctx.writer.min_length = ctx.fmtcnt + 100; + ctx.writer.overallocate = 1; + + if (PyTuple_Check(args)) { + ctx.arglen = PyTuple_Size(args); + ctx.argidx = 0; + } + else { + ctx.arglen = -1; + ctx.argidx = -2; + } + ctx.args_owned = 0; + if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args)) + ctx.dict = args; + else + ctx.dict = NULL; + ctx.args = args; + + while (--ctx.fmtcnt >= 0) { + if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { + Py_ssize_t nonfmtpos; + + nonfmtpos = ctx.fmtpos++; + while (ctx.fmtcnt >= 0 && + PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { + ctx.fmtpos++; + ctx.fmtcnt--; } - if (width > len) { - sublen = width - len; - FILL(writer.kind, writer.data, ' ', writer.pos, sublen); - writer.pos += sublen; + if (ctx.fmtcnt < 0) { + ctx.fmtpos--; + ctx.writer.overallocate = 0; } -nextarg: - if (dict && (argidx < arglen) && c != '%') { - PyErr_SetString(PyExc_TypeError, - "not all arguments converted during string formatting"); + if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr, + nonfmtpos, ctx.fmtpos) < 0) goto onError; - } - Py_CLEAR(temp); - } /* '%' */ - } /* until end */ - if (argidx < arglen && !dict) { + } + else { + ctx.fmtpos++; + if (unicode_format_arg(&ctx) == -1) + goto onError; + } + } + + if (ctx.argidx < ctx.arglen && !ctx.dict) { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); goto onError;