unicode_format.h -- implementation of str.format().
*/
-#include "accu.h"
-
/* Defines for more efficiently reallocating the string buffer */
#define INITIAL_SIZE_INCREMENT 100
#define SIZE_MULTIPLIER 2
}
-/************************************************************************/
-/*********** Output string management functions ****************/
-/************************************************************************/
-
-/*
- output_data dumps characters into our output string
- buffer.
-
- In some cases, it has to reallocate the string.
-
- It returns a status: 0 for a failed reallocation,
- 1 for success.
-*/
-static int
-output_data(_PyAccu *acc, PyObject *s, Py_ssize_t start, Py_ssize_t end)
-{
- PyObject *substring;
- int r;
-
- substring = PyUnicode_Substring(s, start, end);
- if (substring == NULL)
- return 0;
- r = _PyAccu_Accumulate(acc, substring);
- Py_DECREF(substring);
- return r == 0;
-}
-
/************************************************************************/
/*********** Format string parsing -- integers and identifiers *********/
/************************************************************************/
appends to the output.
*/
static int
-render_field(PyObject *fieldobj, SubString *format_spec, _PyAccu *acc)
+render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *writer)
{
int ok = 0;
PyObject *result = NULL;
goto done;
assert(PyUnicode_Check(result));
- ok = output_data(acc, result, 0, PyUnicode_GET_LENGTH(result));
+
+ ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0);
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
static int
output_markup(SubString *field_name, SubString *format_spec,
int format_spec_needs_expanding, Py_UCS4 conversion,
- _PyAccu *acc, PyObject *args, PyObject *kwargs,
+ unicode_writer_t *writer, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
PyObject *tmp = NULL;
else
actual_format_spec = format_spec;
- if (render_field(fieldobj, actual_format_spec, acc) == 0)
+ if (render_field(fieldobj, actual_format_spec, writer) == 0)
goto done;
result = 1;
*/
static int
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
- _PyAccu *acc, int recursion_depth, AutoNumber *auto_number)
+ unicode_writer_t *writer, int recursion_depth, AutoNumber *auto_number)
{
MarkupIterator iter;
int format_spec_needs_expanding;
SubString field_name;
SubString format_spec;
Py_UCS4 conversion;
+ int err;
MarkupIterator_init(&iter, input->str, input->start, input->end);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- if (!output_data(acc, literal.str, literal.start, literal.end))
+ err = unicode_writer_write_str(writer,
+ literal.str, literal.start,
+ literal.end - literal.start);
+ if (err == -1)
return 0;
if (field_present)
if (!output_markup(&field_name, &format_spec,
- format_spec_needs_expanding, conversion, acc,
+ format_spec_needs_expanding, conversion, writer,
args, kwargs, recursion_depth, auto_number))
return 0;
}
build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
- _PyAccu acc;
+ unicode_writer_t writer;
+ Py_ssize_t initlen;
/* check the recursion level */
if (recursion_depth <= 0) {
return NULL;
}
- if (_PyAccu_Init(&acc))
+ initlen = PyUnicode_GET_LENGTH(input->str) + 100;
+ if (unicode_writer_init(&writer, initlen, 127) == -1)
return NULL;
- if (!do_markup(input, args, kwargs, &acc, recursion_depth,
+ if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
- _PyAccu_Destroy(&acc);
+ unicode_writer_dealloc(&writer);
return NULL;
}
- return _PyAccu_Finish(&acc);
+ return unicode_writer_finish(&writer);
}
/************************************************************************/
return PyBool_FromLong(result);
}
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t pos;
+} unicode_writer_t;
+
+Py_LOCAL_INLINE(void)
+unicode_writer_update(unicode_writer_t *writer)
+{
+ writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
+ writer->data = PyUnicode_DATA(writer->buffer);
+ writer->kind = PyUnicode_KIND(writer->buffer);
+}
+
+Py_LOCAL(int)
+unicode_writer_init(unicode_writer_t *writer,
+ Py_ssize_t length, Py_UCS4 maxchar)
+{
+ writer->pos = 0;
+ writer->buffer = PyUnicode_New(length, maxchar);
+ if (writer->buffer == NULL)
+ return -1;
+ unicode_writer_update(writer);
+ return 0;
+}
+
+Py_LOCAL_INLINE(int)
+unicode_writer_prepare(unicode_writer_t *writer,
+ Py_ssize_t length, Py_UCS4 maxchar)
+{
+ Py_ssize_t newlen;
+ PyObject *newbuffer;
+
+ if (length > PY_SSIZE_T_MAX - writer->pos) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ newlen = writer->pos + length;
+
+ if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
+ /* overallocate 25% to limit the number of resize */
+ if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
+ newlen += newlen / 4;
+
+ if (maxchar > writer->maxchar) {
+ /* resize + widen */
+ newbuffer = PyUnicode_New(newlen, maxchar);
+ if (newbuffer == NULL)
+ return -1;
+ PyUnicode_CopyCharacters(newbuffer, 0,
+ writer->buffer, 0, writer->pos);
+ Py_DECREF(writer->buffer);
+ }
+ else {
+ newbuffer = resize_compact(writer->buffer, newlen);
+ if (newbuffer == NULL)
+ return -1;
+ }
+ writer->buffer = newbuffer;
+ unicode_writer_update(writer);
+ }
+ else if (maxchar > writer->maxchar) {
+ if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
+ return -1;
+ unicode_writer_update(writer);
+ }
+ return 0;
+}
+
+Py_LOCAL_INLINE(int)
+unicode_writer_write_str(
+ unicode_writer_t *writer,
+ PyObject *str, Py_ssize_t start, Py_ssize_t length)
+{
+ Py_UCS4 maxchar;
+
+ assert(str != NULL);
+ assert(PyUnicode_Check(str));
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+
+ assert(0 <= start);
+ assert(0 <= length);
+ assert(start + length <= PyUnicode_GET_LENGTH(str));
+ if (length == 0)
+ return 0;
+
+ maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
+ if (unicode_writer_prepare(writer, length, maxchar) == -1)
+ return -1;
+
+ assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
+ copy_characters(writer->buffer, writer->pos,
+ str, start, length);
+ writer->pos += length;
+ return 0;
+}
+
+Py_LOCAL_INLINE(int)
+unicode_writer_write_char(
+ unicode_writer_t *writer,
+ Py_UCS4 ch)
+{
+ if (unicode_writer_prepare(writer, 1, ch) == -1)
+ return -1;
+ assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
+ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+ writer->pos += 1;
+ return 0;
+}
+
+Py_LOCAL(PyObject *)
+unicode_writer_finish(unicode_writer_t *writer)
+{
+ if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
+ Py_DECREF(writer->buffer);
+ return NULL;
+ }
+ return writer->buffer;
+}
+
+Py_LOCAL(void)
+unicode_writer_dealloc(unicode_writer_t *writer)
+{
+ Py_CLEAR(writer->buffer);
+}
+
#include "stringlib/unicode_format.h"
PyDoc_STRVAR(format__doc__,
return (Py_UCS4) -1;
}
-typedef struct {
- PyObject *buffer;
- void *data;
- enum PyUnicode_Kind kind;
- Py_UCS4 maxchar;
- Py_ssize_t pos;
-} unicode_writer_t;
-
-Py_LOCAL_INLINE(void)
-unicode_writer_update(unicode_writer_t *writer)
-{
- writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
- writer->data = PyUnicode_DATA(writer->buffer);
- writer->kind = PyUnicode_KIND(writer->buffer);
-}
-
-Py_LOCAL(int)
-unicode_writer_init(unicode_writer_t *writer,
- Py_ssize_t length, Py_UCS4 maxchar)
-{
- writer->pos = 0;
- writer->buffer = PyUnicode_New(length, maxchar);
- if (writer->buffer == NULL)
- return -1;
- unicode_writer_update(writer);
- return 0;
-}
-
-Py_LOCAL_INLINE(int)
-unicode_writer_prepare(unicode_writer_t *writer,
- Py_ssize_t length, Py_UCS4 maxchar)
-{
- Py_ssize_t newlen;
- PyObject *newbuffer;
-
- if (length > PY_SSIZE_T_MAX - writer->pos) {
- PyErr_NoMemory();
- return -1;
- }
- newlen = writer->pos + length;
-
- if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
- /* overallocate 25% to limit the number of resize */
- if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
- newlen += newlen / 4;
-
- if (maxchar > writer->maxchar) {
- /* resize + widen */
- newbuffer = PyUnicode_New(newlen, maxchar);
- if (newbuffer == NULL)
- return -1;
- PyUnicode_CopyCharacters(newbuffer, 0,
- writer->buffer, 0, writer->pos);
- Py_DECREF(writer->buffer);
- }
- else {
- newbuffer = resize_compact(writer->buffer, newlen);
- if (newbuffer == NULL)
- return -1;
- }
- writer->buffer = newbuffer;
- unicode_writer_update(writer);
- }
- else if (maxchar > writer->maxchar) {
- if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
- return -1;
- unicode_writer_update(writer);
- }
- return 0;
-}
-
-Py_LOCAL_INLINE(int)
-unicode_writer_write_str(
- unicode_writer_t *writer,
- PyObject *str, Py_ssize_t start, Py_ssize_t length)
-{
- Py_UCS4 maxchar;
-
- assert(str != NULL);
- assert(PyUnicode_Check(str));
- if (PyUnicode_READY(str) == -1)
- return -1;
-
- assert(0 <= start);
- assert(0 <= length);
- assert(start + length <= PyUnicode_GET_LENGTH(str));
- if (length == 0)
- return 0;
-
- maxchar = _PyUnicode_FindMaxChar(str, start, start + length);
- if (unicode_writer_prepare(writer, length, maxchar) == -1)
- return -1;
-
- assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer));
- copy_characters(writer->buffer, writer->pos,
- str, start, length);
- writer->pos += length;
- return 0;
-}
-
-Py_LOCAL_INLINE(int)
-unicode_writer_write_char(
- unicode_writer_t *writer,
- Py_UCS4 ch)
-{
- if (unicode_writer_prepare(writer, 1, ch) == -1)
- return -1;
- assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer));
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
- writer->pos += 1;
- return 0;
-}
-
-Py_LOCAL(PyObject *)
-unicode_writer_finish(unicode_writer_t *writer)
-{
- if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
- Py_DECREF(writer->buffer);
- return NULL;
- }
- return writer->buffer;
-}
-
-Py_LOCAL(void)
-unicode_writer_dealloc(unicode_writer_t *writer)
-{
- Py_CLEAR(writer->buffer);
-}
-
PyObject *
PyUnicode_Format(PyObject *format, PyObject *args)
{