/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* case 0x0009: * HORIZONTAL TABULATION */
/* case 0x000A: * LINE FEED */
/* case 0x000B: * VERTICAL TABULATION */
/* case 0x000C: * FORM FEED */
/* case 0x000D: * CARRIAGE RETURN */
- 0, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* case 0x001C: * FILE SEPARATOR */
/* case 0x001D: * GROUP SEPARATOR */
/* case 0x001E: * RECORD SEPARATOR */
/* case 0x001F: * UNIT SEPARATOR */
- 0, 0, 0, 0, 1, 1, 1, 1,
+ 0, 0, 0, 0, 1, 1, 1, 1,
/* case 0x0020: * SPACE */
- 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
+ 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
};
/* Same for linebreaks */
static unsigned char ascii_linebreak[] = {
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x000A, * LINE FEED */
/* 0x000D, * CARRIAGE RETURN */
- 0, 0, 1, 0, 0, 1, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 1, 0, 0, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x001C, * FILE SEPARATOR */
/* 0x001D, * GROUP SEPARATOR */
/* 0x001E, * RECORD SEPARATOR */
- 0, 0, 0, 0, 1, 1, 1, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
+ 0, 0, 0, 0, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
};
PyUnicode_GetMax(void)
{
#ifdef Py_UNICODE_WIDE
- return 0x10FFFF;
+ return 0x10FFFF;
#else
- /* This is actually an illegal character, so it should
- not be passed to unichr. */
- return 0xFFFF;
+ /* This is actually an illegal character, so it should
+ not be passed to unichr. */
+ return 0xFFFF;
#endif
}
/* Shortcut if there's nothing much to do. */
if (unicode->length == length)
- goto reset;
+ goto reset;
/* Resizing shared object (unicode_empty or single character
objects) in-place is not allowed. Use PyUnicode_Resize()
instead ! */
- if (unicode == unicode_empty ||
- (unicode->length == 1 &&
- unicode->str[0] < 256U &&
- unicode_latin1[unicode->str[0]] == unicode)) {
+ if (unicode == unicode_empty ||
+ (unicode->length == 1 &&
+ unicode->str[0] < 256U &&
+ unicode_latin1[unicode->str[0]] == unicode)) {
PyErr_SetString(PyExc_SystemError,
"can't resize shared str objects");
return -1;
oldstr = unicode->str;
unicode->str = PyObject_REALLOC(unicode->str,
- sizeof(Py_UNICODE) * (length + 1));
+ sizeof(Py_UNICODE) * (length + 1));
if (!unicode->str) {
- unicode->str = (Py_UNICODE *)oldstr;
+ unicode->str = (Py_UNICODE *)oldstr;
PyErr_NoMemory();
return -1;
}
unicode = free_list;
free_list = *(PyUnicodeObject **)unicode;
numfree--;
- if (unicode->str) {
- /* Keep-Alive optimization: we only upsize the buffer,
- never downsize it. */
- if ((unicode->length < length) &&
+ if (unicode->str) {
+ /* Keep-Alive optimization: we only upsize the buffer,
+ never downsize it. */
+ if ((unicode->length < length) &&
unicode_resize(unicode, length) < 0) {
- PyObject_DEL(unicode->str);
- unicode->str = NULL;
- }
- }
+ PyObject_DEL(unicode->str);
+ unicode->str = NULL;
+ }
+ }
else {
- size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
- unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
+ size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+ unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
}
PyObject_INIT(unicode, &PyUnicode_Type);
}
else {
- size_t new_size;
+ size_t new_size;
unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
if (unicode == NULL)
return NULL;
- new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
- unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
+ new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+ unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
}
if (!unicode->str) {
- PyErr_NoMemory();
- goto onError;
+ PyErr_NoMemory();
+ goto onError;
}
/* Initialize the first element to guard against cases where
* the caller fails before initializing str -- unicode_resize()
}
if (PyUnicode_CheckExact(unicode) &&
- numfree < PyUnicode_MAXFREELIST) {
+ numfree < PyUnicode_MAXFREELIST) {
/* Keep-Alive optimization */
- if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
- PyObject_DEL(unicode->str);
- unicode->str = NULL;
- unicode->length = 0;
- }
- if (unicode->defenc) {
- Py_DECREF(unicode->defenc);
- unicode->defenc = NULL;
- }
- /* Add to free list */
+ if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
+ PyObject_DEL(unicode->str);
+ unicode->str = NULL;
+ unicode->length = 0;
+ }
+ if (unicode->defenc) {
+ Py_DECREF(unicode->defenc);
+ unicode->defenc = NULL;
+ }
+ /* Add to free list */
*(PyUnicodeObject **)unicode = free_list;
free_list = unicode;
numfree++;
}
else {
- PyObject_DEL(unicode->str);
- Py_XDECREF(unicode->defenc);
- Py_TYPE(unicode)->tp_free((PyObject *)unicode);
+ PyObject_DEL(unicode->str);
+ Py_XDECREF(unicode->defenc);
+ Py_TYPE(unicode)->tp_free((PyObject *)unicode);
}
}
/* Argument checks */
if (unicode == NULL) {
- PyErr_BadInternalCall();
- return -1;
+ PyErr_BadInternalCall();
+ return -1;
}
v = *unicode;
if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) {
- PyErr_BadInternalCall();
- return -1;
+ PyErr_BadInternalCall();
+ return -1;
}
/* Resizing unicode_empty and single character objects is not
possible since these are being shared. We simply return a fresh
copy with the same Unicode content. */
if (v->length != length &&
- (v == unicode_empty || v->length == 1)) {
- PyUnicodeObject *w = _PyUnicode_New(length);
- if (w == NULL)
- return -1;
- Py_UNICODE_COPY(w->str, v->str,
- length < v->length ? length : v->length);
- Py_DECREF(*unicode);
- *unicode = w;
- return 0;
+ (v == unicode_empty || v->length == 1)) {
+ PyUnicodeObject *w = _PyUnicode_New(length);
+ if (w == NULL)
+ return -1;
+ Py_UNICODE_COPY(w->str, v->str,
+ length < v->length ? length : v->length);
+ Py_DECREF(*unicode);
+ *unicode = w;
+ return 0;
}
/* Note that we don't have to modify *unicode for unshared Unicode
}
PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
- Py_ssize_t size)
+ Py_ssize_t size)
{
PyUnicodeObject *unicode;
some optimizations which share commonly used objects. */
if (u != NULL) {
- /* Optimization for empty strings */
- if (size == 0 && unicode_empty != NULL) {
- Py_INCREF(unicode_empty);
- return (PyObject *)unicode_empty;
- }
-
- /* Single character Unicode objects in the Latin-1 range are
- shared when using this constructor */
- if (size == 1 && *u < 256) {
- unicode = unicode_latin1[*u];
- if (!unicode) {
- unicode = _PyUnicode_New(1);
- if (!unicode)
- return NULL;
- unicode->str[0] = *u;
- unicode_latin1[*u] = unicode;
- }
- Py_INCREF(unicode);
- return (PyObject *)unicode;
- }
+ /* Optimization for empty strings */
+ if (size == 0 && unicode_empty != NULL) {
+ Py_INCREF(unicode_empty);
+ return (PyObject *)unicode_empty;
+ }
+
+ /* Single character Unicode objects in the Latin-1 range are
+ shared when using this constructor */
+ if (size == 1 && *u < 256) {
+ unicode = unicode_latin1[*u];
+ if (!unicode) {
+ unicode = _PyUnicode_New(1);
+ if (!unicode)
+ return NULL;
+ unicode->str[0] = *u;
+ unicode_latin1[*u] = unicode;
+ }
+ Py_INCREF(unicode);
+ return (PyObject *)unicode;
+ }
}
unicode = _PyUnicode_New(size);
/* Copy the Unicode data into the new object */
if (u != NULL)
- Py_UNICODE_COPY(unicode->str, u, size);
+ Py_UNICODE_COPY(unicode->str, u, size);
return (PyObject *)unicode;
}
{
PyUnicodeObject *unicode;
- if (size < 0) {
- PyErr_SetString(PyExc_SystemError,
- "Negative size passed to PyUnicode_FromStringAndSize");
- return NULL;
- }
+ if (size < 0) {
+ PyErr_SetString(PyExc_SystemError,
+ "Negative size passed to PyUnicode_FromStringAndSize");
+ return NULL;
+ }
/* If the Unicode data is known at construction time, we can apply
some optimizations which share commonly used objects.
UTF-8 decoder at the end. */
if (u != NULL) {
- /* Optimization for empty strings */
- if (size == 0 && unicode_empty != NULL) {
- Py_INCREF(unicode_empty);
- return (PyObject *)unicode_empty;
- }
+ /* Optimization for empty strings */
+ if (size == 0 && unicode_empty != NULL) {
+ Py_INCREF(unicode_empty);
+ return (PyObject *)unicode_empty;
+ }
- /* Single characters are shared when using this constructor.
+ /* Single characters are shared when using this constructor.
Restrict to ASCII, since the input must be UTF-8. */
- if (size == 1 && Py_CHARMASK(*u) < 128) {
- unicode = unicode_latin1[Py_CHARMASK(*u)];
- if (!unicode) {
- unicode = _PyUnicode_New(1);
- if (!unicode)
- return NULL;
- unicode->str[0] = Py_CHARMASK(*u);
- unicode_latin1[Py_CHARMASK(*u)] = unicode;
- }
- Py_INCREF(unicode);
- return (PyObject *)unicode;
- }
+ if (size == 1 && Py_CHARMASK(*u) < 128) {
+ unicode = unicode_latin1[Py_CHARMASK(*u)];
+ if (!unicode) {
+ unicode = _PyUnicode_New(1);
+ if (!unicode)
+ return NULL;
+ unicode->str[0] = Py_CHARMASK(*u);
+ unicode_latin1[Py_CHARMASK(*u)] = unicode;
+ }
+ Py_INCREF(unicode);
+ return (PyObject *)unicode;
+ }
return PyUnicode_DecodeUTF8(u, size, NULL);
}
#ifdef HAVE_WCHAR_H
PyObject *PyUnicode_FromWideChar(register const wchar_t *w,
- Py_ssize_t size)
+ Py_ssize_t size)
{
PyUnicodeObject *unicode;
if (w == NULL) {
if (size == 0)
return PyUnicode_FromStringAndSize(NULL, 0);
- PyErr_BadInternalCall();
- return NULL;
+ PyErr_BadInternalCall();
+ return NULL;
}
if (size == -1) {
memcpy(unicode->str, w, size * sizeof(wchar_t));
#else
{
- register Py_UNICODE *u;
- register Py_ssize_t i;
- u = PyUnicode_AS_UNICODE(unicode);
- for (i = size; i > 0; i--)
- *u++ = *w++;
+ register Py_UNICODE *u;
+ register Py_ssize_t i;
+ u = PyUnicode_AS_UNICODE(unicode);
+ for (i = size; i > 0; i--)
+ *u++ = *w++;
}
#endif
static void
makefmt(char *fmt, int longflag, int size_tflag, int zeropad, int width, int precision, char c)
{
- *fmt++ = '%';
- if (width) {
- if (zeropad)
- *fmt++ = '0';
- fmt += sprintf(fmt, "%d", width);
- }
- if (precision)
- fmt += sprintf(fmt, ".%d", precision);
- if (longflag)
- *fmt++ = 'l';
- else if (size_tflag) {
- char *f = PY_FORMAT_SIZE_T;
- while (*f)
- *fmt++ = *f++;
- }
- *fmt++ = c;
- *fmt = '\0';
+ *fmt++ = '%';
+ if (width) {
+ if (zeropad)
+ *fmt++ = '0';
+ fmt += sprintf(fmt, "%d", width);
+ }
+ if (precision)
+ fmt += sprintf(fmt, ".%d", precision);
+ if (longflag)
+ *fmt++ = 'l';
+ else if (size_tflag) {
+ char *f = PY_FORMAT_SIZE_T;
+ while (*f)
+ *fmt++ = *f++;
+ }
+ *fmt++ = c;
+ *fmt = '\0';
}
#define appendstring(string) {for (copy = string;*copy;) *s++ = *copy++;}
PyObject *
PyUnicode_FromFormatV(const char *format, va_list vargs)
{
- va_list count;
- Py_ssize_t callcount = 0;
- PyObject **callresults = NULL;
- PyObject **callresult = NULL;
- Py_ssize_t n = 0;
- int width = 0;
- int precision = 0;
- int zeropad;
- const char* f;
- Py_UNICODE *s;
- PyObject *string;
- /* used by sprintf */
- char buffer[21];
- /* use abuffer instead of buffer, if we need more space
- * (which can happen if there's a format specifier with width). */
- char *abuffer = NULL;
- char *realbuffer;
- Py_ssize_t abuffersize = 0;
- char fmt[60]; /* should be enough for %0width.precisionld */
- const char *copy;
+ va_list count;
+ Py_ssize_t callcount = 0;
+ PyObject **callresults = NULL;
+ PyObject **callresult = NULL;
+ Py_ssize_t n = 0;
+ int width = 0;
+ int precision = 0;
+ int zeropad;
+ const char* f;
+ Py_UNICODE *s;
+ PyObject *string;
+ /* used by sprintf */
+ char buffer[21];
+ /* use abuffer instead of buffer, if we need more space
+ * (which can happen if there's a format specifier with width). */
+ char *abuffer = NULL;
+ char *realbuffer;
+ Py_ssize_t abuffersize = 0;
+ char fmt[60]; /* should be enough for %0width.precisionld */
+ const char *copy;
#ifdef VA_LIST_IS_ARRAY
- Py_MEMCPY(count, vargs, sizeof(va_list));
+ Py_MEMCPY(count, vargs, sizeof(va_list));
#else
#ifdef __va_copy
- __va_copy(count, vargs);
+ __va_copy(count, vargs);
#else
- count = vargs;
+ count = vargs;
#endif
#endif
- /* step 1: count the number of %S/%R/%A format specifications
- * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII() for
- * these objects once during step 3 and put the result in
- an array) */
- for (f = format; *f; f++) {
- if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A'))
- ++callcount;
- }
- /* step 2: allocate memory for the results of
- * PyObject_Str()/PyObject_Repr() calls */
- if (callcount) {
- callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
- if (!callresults) {
- PyErr_NoMemory();
- return NULL;
- }
- callresult = callresults;
- }
- /* step 3: figure out how large a buffer we need */
- for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f;
- width = 0;
- while (ISDIGIT((unsigned)*f))
- width = (width*10) + *f++ - '0';
- while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
- ;
-
- /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
- * they don't affect the amount of space we reserve.
- */
- if ((*f == 'l' || *f == 'z') &&
- (f[1] == 'd' || f[1] == 'u'))
+ /* step 1: count the number of %S/%R/%A format specifications
+ * (we call PyObject_Str()/PyObject_Repr()/PyObject_ASCII() for
+ * these objects once during step 3 and put the result in
+ an array) */
+ for (f = format; *f; f++) {
+ if (*f == '%' && (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A'))
+ ++callcount;
+ }
+ /* step 2: allocate memory for the results of
+ * PyObject_Str()/PyObject_Repr() calls */
+ if (callcount) {
+ callresults = PyObject_Malloc(sizeof(PyObject *)*callcount);
+ if (!callresults) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ callresult = callresults;
+ }
+ /* step 3: figure out how large a buffer we need */
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f;
+ width = 0;
+ while (ISDIGIT((unsigned)*f))
+ width = (width*10) + *f++ - '0';
+ while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
+ ;
+
+ /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
+ * they don't affect the amount of space we reserve.
+ */
+ if ((*f == 'l' || *f == 'z') &&
+ (f[1] == 'd' || f[1] == 'u'))
++f;
- switch (*f) {
- case 'c':
- (void)va_arg(count, int);
- /* fall through... */
- case '%':
- n++;
- break;
- case 'd': case 'u': case 'i': case 'x':
- (void) va_arg(count, int);
- /* 20 bytes is enough to hold a 64-bit
- integer. Decimal takes the most space.
- This isn't enough for octal.
- If a width is specified we need more
- (which we allocate later). */
- if (width < 20)
- width = 20;
- n += width;
- if (abuffersize < width)
- abuffersize = width;
- break;
- case 's':
- {
- /* UTF-8 */
- unsigned char*s;
- s = va_arg(count, unsigned char*);
- while (*s) {
- if (*s < 128) {
- n++; s++;
- } else if (*s < 0xc0) {
- /* invalid UTF-8 */
- n++; s++;
- } else if (*s < 0xc0) {
- n++;
- s++; if(!*s)break;
- s++;
- } else if (*s < 0xe0) {
- n++;
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++;
- } else {
- #ifdef Py_UNICODE_WIDE
- n++;
- #else
- n+=2;
- #endif
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++; if(!*s)break;
- s++;
- }
- }
- break;
- }
- case 'U':
- {
- PyObject *obj = va_arg(count, PyObject *);
- assert(obj && PyUnicode_Check(obj));
- n += PyUnicode_GET_SIZE(obj);
- break;
- }
- case 'V':
- {
- PyObject *obj = va_arg(count, PyObject *);
- const char *str = va_arg(count, const char *);
- assert(obj || str);
- assert(!obj || PyUnicode_Check(obj));
- if (obj)
- n += PyUnicode_GET_SIZE(obj);
- else
- n += strlen(str);
- break;
- }
- case 'S':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *str;
- assert(obj);
- str = PyObject_Str(obj);
- if (!str)
- goto fail;
- n += PyUnicode_GET_SIZE(str);
- /* Remember the str and switch to the next slot */
- *callresult++ = str;
- break;
- }
- case 'R':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *repr;
- assert(obj);
- repr = PyObject_Repr(obj);
- if (!repr)
- goto fail;
- n += PyUnicode_GET_SIZE(repr);
- /* Remember the repr and switch to the next slot */
- *callresult++ = repr;
- break;
- }
- case 'A':
- {
- PyObject *obj = va_arg(count, PyObject *);
- PyObject *ascii;
- assert(obj);
- ascii = PyObject_ASCII(obj);
- if (!ascii)
- goto fail;
- n += PyUnicode_GET_SIZE(ascii);
- /* Remember the repr and switch to the next slot */
- *callresult++ = ascii;
- break;
- }
- case 'p':
- (void) va_arg(count, int);
- /* maximum 64-bit pointer representation:
- * 0xffffffffffffffff
- * so 19 characters is enough.
- * XXX I count 18 -- what's the extra for?
- */
- n += 19;
- break;
- default:
- /* if we stumble upon an unknown
- formatting code, copy the rest of
- the format string to the output
- string. (we cannot just skip the
- code, since there's no way to know
- what's in the argument list) */
- n += strlen(p);
- goto expand;
- }
- } else
- n++;
- }
+ switch (*f) {
+ case 'c':
+ (void)va_arg(count, int);
+ /* fall through... */
+ case '%':
+ n++;
+ break;
+ case 'd': case 'u': case 'i': case 'x':
+ (void) va_arg(count, int);
+ /* 20 bytes is enough to hold a 64-bit
+ integer. Decimal takes the most space.
+ This isn't enough for octal.
+ If a width is specified we need more
+ (which we allocate later). */
+ if (width < 20)
+ width = 20;
+ n += width;
+ if (abuffersize < width)
+ abuffersize = width;
+ break;
+ case 's':
+ {
+ /* UTF-8 */
+ unsigned char*s;
+ s = va_arg(count, unsigned char*);
+ while (*s) {
+ if (*s < 128) {
+ n++; s++;
+ } else if (*s < 0xc0) {
+ /* invalid UTF-8 */
+ n++; s++;
+ } else if (*s < 0xc0) {
+ n++;
+ s++; if(!*s)break;
+ s++;
+ } else if (*s < 0xe0) {
+ n++;
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++;
+ } else {
+ #ifdef Py_UNICODE_WIDE
+ n++;
+ #else
+ n+=2;
+ #endif
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++; if(!*s)break;
+ s++;
+ }
+ }
+ break;
+ }
+ case 'U':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ assert(obj && PyUnicode_Check(obj));
+ n += PyUnicode_GET_SIZE(obj);
+ break;
+ }
+ case 'V':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ const char *str = va_arg(count, const char *);
+ assert(obj || str);
+ assert(!obj || PyUnicode_Check(obj));
+ if (obj)
+ n += PyUnicode_GET_SIZE(obj);
+ else
+ n += strlen(str);
+ break;
+ }
+ case 'S':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ PyObject *str;
+ assert(obj);
+ str = PyObject_Str(obj);
+ if (!str)
+ goto fail;
+ n += PyUnicode_GET_SIZE(str);
+ /* Remember the str and switch to the next slot */
+ *callresult++ = str;
+ break;
+ }
+ case 'R':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ PyObject *repr;
+ assert(obj);
+ repr = PyObject_Repr(obj);
+ if (!repr)
+ goto fail;
+ n += PyUnicode_GET_SIZE(repr);
+ /* Remember the repr and switch to the next slot */
+ *callresult++ = repr;
+ break;
+ }
+ case 'A':
+ {
+ PyObject *obj = va_arg(count, PyObject *);
+ PyObject *ascii;
+ assert(obj);
+ ascii = PyObject_ASCII(obj);
+ if (!ascii)
+ goto fail;
+ n += PyUnicode_GET_SIZE(ascii);
+ /* Remember the repr and switch to the next slot */
+ *callresult++ = ascii;
+ break;
+ }
+ case 'p':
+ (void) va_arg(count, int);
+ /* maximum 64-bit pointer representation:
+ * 0xffffffffffffffff
+ * so 19 characters is enough.
+ * XXX I count 18 -- what's the extra for?
+ */
+ n += 19;
+ break;
+ default:
+ /* if we stumble upon an unknown
+ formatting code, copy the rest of
+ the format string to the output
+ string. (we cannot just skip the
+ code, since there's no way to know
+ what's in the argument list) */
+ n += strlen(p);
+ goto expand;
+ }
+ } else
+ n++;
+ }
expand:
- if (abuffersize > 20) {
- abuffer = PyObject_Malloc(abuffersize);
- if (!abuffer) {
- PyErr_NoMemory();
- goto fail;
- }
- realbuffer = abuffer;
- }
- else
- realbuffer = buffer;
- /* step 4: fill the buffer */
- /* Since we've analyzed how much space we need for the worst case,
- we don't have to resize the string.
- There can be no errors beyond this point. */
- string = PyUnicode_FromUnicode(NULL, n);
- if (!string)
- goto fail;
-
- s = PyUnicode_AS_UNICODE(string);
- callresult = callresults;
-
- for (f = format; *f; f++) {
- if (*f == '%') {
- const char* p = f++;
- int longflag = 0;
- int size_tflag = 0;
- zeropad = (*f == '0');
- /* parse the width.precision part */
- width = 0;
- while (ISDIGIT((unsigned)*f))
- width = (width*10) + *f++ - '0';
- precision = 0;
- if (*f == '.') {
- f++;
- while (ISDIGIT((unsigned)*f))
- precision = (precision*10) + *f++ - '0';
- }
- /* handle the long flag, but only for %ld and %lu.
- others can be added when necessary. */
- if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
- longflag = 1;
- ++f;
- }
- /* handle the size_t flag. */
- if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
- size_tflag = 1;
- ++f;
- }
-
- switch (*f) {
- case 'c':
- *s++ = va_arg(vargs, int);
- break;
- case 'd':
- makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
- if (longflag)
- sprintf(realbuffer, fmt, va_arg(vargs, long));
- else if (size_tflag)
- sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t));
- else
- sprintf(realbuffer, fmt, va_arg(vargs, int));
- appendstring(realbuffer);
- break;
- case 'u':
- makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u');
- if (longflag)
- sprintf(realbuffer, fmt, va_arg(vargs, unsigned long));
- else if (size_tflag)
- sprintf(realbuffer, fmt, va_arg(vargs, size_t));
- else
- sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
- appendstring(realbuffer);
- break;
- case 'i':
- makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
- sprintf(realbuffer, fmt, va_arg(vargs, int));
- appendstring(realbuffer);
- break;
- case 'x':
- makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
- sprintf(realbuffer, fmt, va_arg(vargs, int));
- appendstring(realbuffer);
- break;
- case 's':
- {
- /* Parameter must be UTF-8 encoded.
- In case of encoding errors, use
- the replacement character. */
- PyObject *u;
- p = va_arg(vargs, char*);
- u = PyUnicode_DecodeUTF8(p, strlen(p),
- "replace");
- if (!u)
- goto fail;
- Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
- PyUnicode_GET_SIZE(u));
- s += PyUnicode_GET_SIZE(u);
- Py_DECREF(u);
- break;
- }
- case 'U':
- {
- PyObject *obj = va_arg(vargs, PyObject *);
- Py_ssize_t size = PyUnicode_GET_SIZE(obj);
- Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
- s += size;
- break;
- }
- case 'V':
- {
- PyObject *obj = va_arg(vargs, PyObject *);
- const char *str = va_arg(vargs, const char *);
- if (obj) {
- Py_ssize_t size = PyUnicode_GET_SIZE(obj);
- Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
- s += size;
- } else {
- appendstring(str);
- }
- break;
- }
- case 'S':
- case 'R':
- {
- Py_UNICODE *ucopy;
- Py_ssize_t usize;
- Py_ssize_t upos;
- /* unused, since we already have the result */
- (void) va_arg(vargs, PyObject *);
- ucopy = PyUnicode_AS_UNICODE(*callresult);
- usize = PyUnicode_GET_SIZE(*callresult);
- for (upos = 0; upos<usize;)
- *s++ = ucopy[upos++];
- /* We're done with the unicode()/repr() => forget it */
- Py_DECREF(*callresult);
- /* switch to next unicode()/repr() result */
- ++callresult;
- break;
- }
- case 'p':
- sprintf(buffer, "%p", va_arg(vargs, void*));
- /* %p is ill-defined: ensure leading 0x. */
- if (buffer[1] == 'X')
- buffer[1] = 'x';
- else if (buffer[1] != 'x') {
- memmove(buffer+2, buffer, strlen(buffer)+1);
- buffer[0] = '0';
- buffer[1] = 'x';
- }
- appendstring(buffer);
- break;
- case '%':
- *s++ = '%';
- break;
- default:
- appendstring(p);
- goto end;
- }
- } else
- *s++ = *f;
- }
+ if (abuffersize > 20) {
+ abuffer = PyObject_Malloc(abuffersize);
+ if (!abuffer) {
+ PyErr_NoMemory();
+ goto fail;
+ }
+ realbuffer = abuffer;
+ }
+ else
+ realbuffer = buffer;
+ /* step 4: fill the buffer */
+ /* Since we've analyzed how much space we need for the worst case,
+ we don't have to resize the string.
+ There can be no errors beyond this point. */
+ string = PyUnicode_FromUnicode(NULL, n);
+ if (!string)
+ goto fail;
+
+ s = PyUnicode_AS_UNICODE(string);
+ callresult = callresults;
+
+ for (f = format; *f; f++) {
+ if (*f == '%') {
+ const char* p = f++;
+ int longflag = 0;
+ int size_tflag = 0;
+ zeropad = (*f == '0');
+ /* parse the width.precision part */
+ width = 0;
+ while (ISDIGIT((unsigned)*f))
+ width = (width*10) + *f++ - '0';
+ precision = 0;
+ if (*f == '.') {
+ f++;
+ while (ISDIGIT((unsigned)*f))
+ precision = (precision*10) + *f++ - '0';
+ }
+ /* handle the long flag, but only for %ld and %lu.
+ others can be added when necessary. */
+ if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
+ longflag = 1;
+ ++f;
+ }
+ /* handle the size_t flag. */
+ if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
+ size_tflag = 1;
+ ++f;
+ }
+
+ switch (*f) {
+ case 'c':
+ *s++ = va_arg(vargs, int);
+ break;
+ case 'd':
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'd');
+ if (longflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, long));
+ else if (size_tflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, Py_ssize_t));
+ else
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 'u':
+ makefmt(fmt, longflag, size_tflag, zeropad, width, precision, 'u');
+ if (longflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, unsigned long));
+ else if (size_tflag)
+ sprintf(realbuffer, fmt, va_arg(vargs, size_t));
+ else
+ sprintf(realbuffer, fmt, va_arg(vargs, unsigned int));
+ appendstring(realbuffer);
+ break;
+ case 'i':
+ makefmt(fmt, 0, 0, zeropad, width, precision, 'i');
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 'x':
+ makefmt(fmt, 0, 0, zeropad, width, precision, 'x');
+ sprintf(realbuffer, fmt, va_arg(vargs, int));
+ appendstring(realbuffer);
+ break;
+ case 's':
+ {
+ /* Parameter must be UTF-8 encoded.
+ In case of encoding errors, use
+ the replacement character. */
+ PyObject *u;
+ p = va_arg(vargs, char*);
+ u = PyUnicode_DecodeUTF8(p, strlen(p),
+ "replace");
+ if (!u)
+ goto fail;
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u),
+ PyUnicode_GET_SIZE(u));
+ s += PyUnicode_GET_SIZE(u);
+ Py_DECREF(u);
+ break;
+ }
+ case 'U':
+ {
+ PyObject *obj = va_arg(vargs, PyObject *);
+ Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+ s += size;
+ break;
+ }
+ case 'V':
+ {
+ PyObject *obj = va_arg(vargs, PyObject *);
+ const char *str = va_arg(vargs, const char *);
+ if (obj) {
+ Py_ssize_t size = PyUnicode_GET_SIZE(obj);
+ Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
+ s += size;
+ } else {
+ appendstring(str);
+ }
+ break;
+ }
+ case 'S':
+ case 'R':
+ {
+ Py_UNICODE *ucopy;
+ Py_ssize_t usize;
+ Py_ssize_t upos;
+ /* unused, since we already have the result */
+ (void) va_arg(vargs, PyObject *);
+ ucopy = PyUnicode_AS_UNICODE(*callresult);
+ usize = PyUnicode_GET_SIZE(*callresult);
+ for (upos = 0; upos<usize;)
+ *s++ = ucopy[upos++];
+ /* We're done with the unicode()/repr() => forget it */
+ Py_DECREF(*callresult);
+ /* switch to next unicode()/repr() result */
+ ++callresult;
+ break;
+ }
+ case 'p':
+ sprintf(buffer, "%p", va_arg(vargs, void*));
+ /* %p is ill-defined: ensure leading 0x. */
+ if (buffer[1] == 'X')
+ buffer[1] = 'x';
+ else if (buffer[1] != 'x') {
+ memmove(buffer+2, buffer, strlen(buffer)+1);
+ buffer[0] = '0';
+ buffer[1] = 'x';
+ }
+ appendstring(buffer);
+ break;
+ case '%':
+ *s++ = '%';
+ break;
+ default:
+ appendstring(p);
+ goto end;
+ }
+ } else
+ *s++ = *f;
+ }
end:
- if (callresults)
- PyObject_Free(callresults);
- if (abuffer)
- PyObject_Free(abuffer);
- PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
- return string;
+ if (callresults)
+ PyObject_Free(callresults);
+ if (abuffer)
+ PyObject_Free(abuffer);
+ PyUnicode_Resize(&string, s - PyUnicode_AS_UNICODE(string));
+ return string;
fail:
- if (callresults) {
- PyObject **callresult2 = callresults;
- while (callresult2 < callresult) {
- Py_DECREF(*callresult2);
- ++callresult2;
- }
- PyObject_Free(callresults);
- }
- if (abuffer)
- PyObject_Free(abuffer);
- return NULL;
+ if (callresults) {
+ PyObject **callresult2 = callresults;
+ while (callresult2 < callresult) {
+ Py_DECREF(*callresult2);
+ ++callresult2;
+ }
+ PyObject_Free(callresults);
+ }
+ if (abuffer)
+ PyObject_Free(abuffer);
+ return NULL;
}
#undef appendstring
PyObject *
PyUnicode_FromFormat(const char *format, ...)
{
- PyObject* ret;
- va_list vargs;
+ PyObject* ret;
+ va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES
- va_start(vargs, format);
+ va_start(vargs, format);
#else
- va_start(vargs);
+ va_start(vargs);
#endif
- ret = PyUnicode_FromFormatV(format, vargs);
- va_end(vargs);
- return ret;
+ ret = PyUnicode_FromFormatV(format, vargs);
+ va_end(vargs);
+ return ret;
}
Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode,
- wchar_t *w,
- Py_ssize_t size)
+ wchar_t *w,
+ Py_ssize_t size)
{
if (unicode == NULL) {
- PyErr_BadInternalCall();
- return -1;
+ PyErr_BadInternalCall();
+ return -1;
}
/* If possible, try to copy the 0-termination as well */
if (size > PyUnicode_GET_SIZE(unicode))
- size = PyUnicode_GET_SIZE(unicode) + 1;
+ size = PyUnicode_GET_SIZE(unicode) + 1;
#ifdef HAVE_USABLE_WCHAR_T
memcpy(w, unicode->str, size * sizeof(wchar_t));
#else
{
- register Py_UNICODE *u;
- register Py_ssize_t i;
- u = PyUnicode_AS_UNICODE(unicode);
- for (i = size; i > 0; i--)
- *w++ = *u++;
+ register Py_UNICODE *u;
+ register Py_ssize_t i;
+ u = PyUnicode_AS_UNICODE(unicode);
+ for (i = size; i > 0; i--)
+ *w++ = *u++;
}
#endif
Py_UNICODE s[2];
if (ordinal < 0 || ordinal > 0x10ffff) {
- PyErr_SetString(PyExc_ValueError,
- "chr() arg not in range(0x110000)");
- return NULL;
+ PyErr_SetString(PyExc_ValueError,
+ "chr() arg not in range(0x110000)");
+ return NULL;
}
#ifndef Py_UNICODE_WIDE
/* XXX Perhaps we should make this API an alias of
PyObject_Str() instead ?! */
if (PyUnicode_CheckExact(obj)) {
- Py_INCREF(obj);
- return obj;
+ Py_INCREF(obj);
+ return obj;
}
if (PyUnicode_Check(obj)) {
- /* For a Unicode subtype that's not a Unicode object,
- return a true Unicode object with the same data. */
- return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
- PyUnicode_GET_SIZE(obj));
+ /* For a Unicode subtype that's not a Unicode object,
+ return a true Unicode object with the same data. */
+ return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(obj),
+ PyUnicode_GET_SIZE(obj));
}
PyErr_Format(PyExc_TypeError,
"Can't convert '%.100s' object to str implicitly",
}
PyObject *PyUnicode_FromEncodedObject(register PyObject *obj,
- const char *encoding,
- const char *errors)
+ const char *encoding,
+ const char *errors)
{
const char *s = NULL;
Py_ssize_t len;
PyObject *v;
if (obj == NULL) {
- PyErr_BadInternalCall();
- return NULL;
+ PyErr_BadInternalCall();
+ return NULL;
}
if (PyUnicode_Check(obj)) {
- PyErr_SetString(PyExc_TypeError,
- "decoding str is not supported");
- return NULL;
- }
+ PyErr_SetString(PyExc_TypeError,
+ "decoding str is not supported");
+ return NULL;
+ }
/* Coerce object */
if (PyBytes_Check(obj)) {
len = PyByteArray_GET_SIZE(obj);
}
else if (PyObject_AsCharBuffer(obj, &s, &len)) {
- /* Overwrite the error message with something more useful in
- case of a TypeError. */
- if (PyErr_ExceptionMatches(PyExc_TypeError))
+ /* Overwrite the error message with something more useful in
+ case of a TypeError. */
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
PyErr_Format(PyExc_TypeError,
- "coercing to str: need string or buffer, "
- "%.80s found",
- Py_TYPE(obj)->tp_name);
- goto onError;
+ "coercing to str: need string or buffer, "
+ "%.80s found",
+ Py_TYPE(obj)->tp_name);
+ goto onError;
}
/* Convert to Unicode */
if (len == 0) {
- Py_INCREF(unicode_empty);
- v = (PyObject *)unicode_empty;
+ Py_INCREF(unicode_empty);
+ v = (PyObject *)unicode_empty;
}
else
- v = PyUnicode_Decode(s, len, encoding, errors);
+ v = PyUnicode_Decode(s, len, encoding, errors);
return v;
}
PyObject *PyUnicode_Decode(const char *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
+ Py_ssize_t size,
+ const char *encoding,
+ const char *errors)
{
PyObject *buffer = NULL, *unicode;
Py_buffer info;
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
/* Decode via the codec registry */
v = PyCodec_Decode(unicode, encoding, errors);
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
/* Decode via the codec registry */
v = PyCodec_Decode(unicode, encoding, errors);
}
PyObject *PyUnicode_Encode(const Py_UNICODE *s,
- Py_ssize_t size,
- const char *encoding,
- const char *errors)
+ Py_ssize_t size,
+ const char *encoding,
+ const char *errors)
{
PyObject *v, *unicode;
unicode = PyUnicode_FromUnicode(s, size);
if (unicode == NULL)
- return NULL;
+ return NULL;
v = PyUnicode_AsEncodedString(unicode, encoding, errors);
Py_DECREF(unicode);
return v;
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
/* Encode via the codec registry */
v = PyCodec_Encode(unicode, encoding, errors);
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
/* Shortcuts for common default encodings */
if (errors == NULL) {
- if (strcmp(encoding, "utf-8") == 0)
- return PyUnicode_AsUTF8String(unicode);
- else if (strcmp(encoding, "latin-1") == 0)
- return PyUnicode_AsLatin1String(unicode);
+ if (strcmp(encoding, "utf-8") == 0)
+ return PyUnicode_AsUTF8String(unicode);
+ else if (strcmp(encoding, "latin-1") == 0)
+ return PyUnicode_AsLatin1String(unicode);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
- else if (strcmp(encoding, "mbcs") == 0)
- return PyUnicode_AsMBCSString(unicode);
+ else if (strcmp(encoding, "mbcs") == 0)
+ return PyUnicode_AsMBCSString(unicode);
#endif
- else if (strcmp(encoding, "ascii") == 0)
- return PyUnicode_AsASCIIString(unicode);
+ else if (strcmp(encoding, "ascii") == 0)
+ return PyUnicode_AsASCIIString(unicode);
/* During bootstrap, we may need to find the encodings
package, to load the file system encoding, and require the
file system encoding in order to load the encodings
else if (Py_FileSystemDefaultEncoding &&
strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 &&
!PyThreadState_GET()->interp->codecs_initialized)
- return PyUnicode_AsASCIIString(unicode);
+ return PyUnicode_AsASCIIString(unicode);
}
/* Encode via the codec registry */
}
if (encoding == NULL)
- encoding = PyUnicode_GetDefaultEncoding();
+ encoding = PyUnicode_GetDefaultEncoding();
/* Encode via the codec registry */
v = PyCodec_Encode(unicode, encoding, errors);
}
PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
- const char *errors)
+ const char *errors)
{
PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
if (v)
int res = -1;
if (*errorHandler == NULL) {
- *errorHandler = PyCodec_LookupError(errors);
- if (*errorHandler == NULL)
- goto onError;
+ *errorHandler = PyCodec_LookupError(errors);
+ if (*errorHandler == NULL)
+ goto onError;
}
if (*exceptionObject == NULL) {
- *exceptionObject = PyUnicodeDecodeError_Create(
- encoding, *input, *inend-*input, *startinpos, *endinpos, reason);
- if (*exceptionObject == NULL)
- goto onError;
+ *exceptionObject = PyUnicodeDecodeError_Create(
+ encoding, *input, *inend-*input, *startinpos, *endinpos, reason);
+ if (*exceptionObject == NULL)
+ goto onError;
}
else {
- if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos))
- goto onError;
- if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos))
- goto onError;
- if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
- goto onError;
+ if (PyUnicodeDecodeError_SetStart(*exceptionObject, *startinpos))
+ goto onError;
+ if (PyUnicodeDecodeError_SetEnd(*exceptionObject, *endinpos))
+ goto onError;
+ if (PyUnicodeDecodeError_SetReason(*exceptionObject, reason))
+ goto onError;
}
restuple = PyObject_CallFunctionObjArgs(*errorHandler, *exceptionObject, NULL);
if (restuple == NULL)
- goto onError;
+ goto onError;
if (!PyTuple_Check(restuple)) {
- PyErr_Format(PyExc_TypeError, &argparse[4]);
- goto onError;
+ PyErr_Format(PyExc_TypeError, &argparse[4]);
+ goto onError;
}
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type, &repunicode, &newpos))
- goto onError;
+ goto onError;
/* Copy back the bytes variables, which might have been modified by the
callback */
if (!inputobj)
goto onError;
if (!PyBytes_Check(inputobj)) {
- PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
+ PyErr_Format(PyExc_TypeError, "exception attribute object must be bytes");
}
*input = PyBytes_AS_STRING(inputobj);
insize = PyBytes_GET_SIZE(inputobj);
Py_DECREF(inputobj);
if (newpos<0)
- newpos = insize+newpos;
+ newpos = insize+newpos;
if (newpos<0 || newpos>insize) {
- PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
- goto onError;
+ PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", newpos);
+ goto onError;
}
/* need more space? (at least enough for what we
repsize = PyUnicode_GET_SIZE(repunicode);
requiredsize = *outpos + repsize + insize-newpos;
if (requiredsize > outsize) {
- if (requiredsize<2*outsize)
- requiredsize = 2*outsize;
- if (_PyUnicode_Resize(output, requiredsize) < 0)
- goto onError;
- *outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
+ if (requiredsize<2*outsize)
+ requiredsize = 2*outsize;
+ if (_PyUnicode_Resize(output, requiredsize) < 0)
+ goto onError;
+ *outptr = PyUnicode_AS_UNICODE(*output) + *outpos;
}
*endinpos = newpos;
*inptr = *input + newpos;
char utf7_special[128] = {
/* indicate whether a UTF-7 character is special i.e. cannot be directly
encoded:
- 0 - not special
- 1 - special
- 2 - whitespace (optional)
- 3 - RFC2152 Set O (optional) */
+ 0 - not special
+ 1 - special
+ 2 - whitespace (optional)
+ 3 - RFC2152 Set O (optional) */
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1,
}
PyObject *PyUnicode_DecodeUTF7(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
}
PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
{
const char *starts = s;
Py_ssize_t startinpos;
}
} else if (SPECIAL(ch,0,0)) {
errmsg = "unexpected special character";
- goto utf7Error;
+ goto utf7Error;
} else {
*p++ = ch;
}
};
PyObject *PyUnicode_DecodeUTF8(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
}
#endif
PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
{
const char *starts = s;
int n;
n = utf8_code_length[ch];
if (s + n > e) {
- if (consumed)
- break;
- else {
- errmsg = "unexpected end of data";
- startinpos = s-starts;
- endinpos = size;
- goto utf8Error;
- }
- }
+ if (consumed)
+ break;
+ else {
+ errmsg = "unexpected end of data";
+ startinpos = s-starts;
+ endinpos = size;
+ goto utf8Error;
+ }
+ }
switch (n) {
case 0:
errmsg = "unexpected code byte";
- startinpos = s-starts;
- endinpos = startinpos+1;
- goto utf8Error;
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ goto utf8Error;
case 1:
errmsg = "internal error";
- startinpos = s-starts;
- endinpos = startinpos+1;
- goto utf8Error;
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ goto utf8Error;
case 2:
if ((s[1] & 0xc0) != 0x80) {
errmsg = "invalid data";
- startinpos = s-starts;
- endinpos = startinpos+2;
- goto utf8Error;
- }
+ startinpos = s-starts;
+ endinpos = startinpos+2;
+ goto utf8Error;
+ }
ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
if (ch < 0x80) {
- startinpos = s-starts;
- endinpos = startinpos+2;
+ startinpos = s-starts;
+ endinpos = startinpos+2;
errmsg = "illegal encoding";
- goto utf8Error;
- }
- else
- *p++ = (Py_UNICODE)ch;
+ goto utf8Error;
+ }
+ else
+ *p++ = (Py_UNICODE)ch;
break;
case 3:
if ((s[1] & 0xc0) != 0x80 ||
(s[2] & 0xc0) != 0x80) {
errmsg = "invalid data";
- startinpos = s-starts;
- endinpos = startinpos+3;
- goto utf8Error;
- }
+ startinpos = s-starts;
+ endinpos = startinpos+3;
+ goto utf8Error;
+ }
ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
if (ch < 0x0800) {
- /* Note: UTF-8 encodings of surrogates are considered
- legal UTF-8 sequences;
+ /* Note: UTF-8 encodings of surrogates are considered
+ legal UTF-8 sequences;
- XXX For wide builds (UCS-4) we should probably try
- to recombine the surrogates into a single code
- unit.
- */
+ XXX For wide builds (UCS-4) we should probably try
+ to recombine the surrogates into a single code
+ unit.
+ */
errmsg = "illegal encoding";
- startinpos = s-starts;
- endinpos = startinpos+3;
- goto utf8Error;
- }
- else
- *p++ = (Py_UNICODE)ch;
+ startinpos = s-starts;
+ endinpos = startinpos+3;
+ goto utf8Error;
+ }
+ else
+ *p++ = (Py_UNICODE)ch;
break;
case 4:
(s[2] & 0xc0) != 0x80 ||
(s[3] & 0xc0) != 0x80) {
errmsg = "invalid data";
- startinpos = s-starts;
- endinpos = startinpos+4;
- goto utf8Error;
- }
+ startinpos = s-starts;
+ endinpos = startinpos+4;
+ goto utf8Error;
+ }
ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
/* validate and convert to UTF-16 */
if ((ch < 0x10000) /* minimum value allowed for 4
- byte encoding */
+ byte encoding */
|| (ch > 0x10ffff)) /* maximum value allowed for
- UTF-16 */
- {
+ UTF-16 */
+ {
errmsg = "illegal encoding";
- startinpos = s-starts;
- endinpos = startinpos+4;
- goto utf8Error;
- }
+ startinpos = s-starts;
+ endinpos = startinpos+4;
+ goto utf8Error;
+ }
#ifdef Py_UNICODE_WIDE
- *p++ = (Py_UNICODE)ch;
+ *p++ = (Py_UNICODE)ch;
#else
/* compute and append the two surrogates: */
default:
/* Other sizes are only needed for UCS-4 */
errmsg = "unsupported Unicode code range";
- startinpos = s-starts;
- endinpos = startinpos+n;
- goto utf8Error;
+ startinpos = s-starts;
+ endinpos = startinpos+n;
+ goto utf8Error;
}
s += n;
- continue;
+ continue;
utf8Error:
outpos = p-PyUnicode_AS_UNICODE(unicode);
if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "utf8", errmsg,
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &unicode, &outpos, &p))
- goto onError;
- aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
+ errors, &errorHandler,
+ "utf8", errmsg,
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ &unicode, &outpos, &p))
+ goto onError;
+ aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
}
if (consumed)
- *consumed = s-starts;
+ *consumed = s-starts;
/* Adjust length */
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
*/
PyObject *
PyUnicode_EncodeUTF8(const Py_UNICODE *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
continue;
- }
+ }
encodeUCS4:
/* Encode UCS4 Unicode ordinals */
*p++ = (char)(0xf0 | (ch >> 18));
return NULL;
}
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
}
/* --- UTF-32 Codec ------------------------------------------------------- */
PyObject *
PyUnicode_DecodeUTF32(const char *s,
- Py_ssize_t size,
- const char *errors,
- int *byteorder)
+ Py_ssize_t size,
+ const char *errors,
+ int *byteorder)
{
return PyUnicode_DecodeUTF32Stateful(s, size, errors, byteorder, NULL);
}
PyObject *
PyUnicode_DecodeUTF32Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- int *byteorder,
- Py_ssize_t *consumed)
+ Py_ssize_t size,
+ const char *errors,
+ int *byteorder,
+ Py_ssize_t *consumed)
{
const char *starts = s;
Py_ssize_t startinpos;
codepoints => count how much extra space we need. */
#ifndef Py_UNICODE_WIDE
for (i = pairs = 0; i < size/4; i++)
- if (((Py_UCS4 *)s)[i] >= 0x10000)
- pairs++;
+ if (((Py_UCS4 *)s)[i] >= 0x10000)
+ pairs++;
#endif
/* This might be one to much, because of a BOM */
const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
(q[iorder[1]] << 8) | q[iorder[0]];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if (bom == 0x0000FEFF) {
- q += 4;
- bo = -1;
- }
- else if (bom == 0xFFFE0000) {
- q += 4;
- bo = 1;
- }
+ if (bom == 0x0000FEFF) {
+ q += 4;
+ bo = -1;
+ }
+ else if (bom == 0xFFFE0000) {
+ q += 4;
+ bo = 1;
+ }
#else
- if (bom == 0x0000FEFF) {
- q += 4;
- bo = 1;
- }
- else if (bom == 0xFFFE0000) {
- q += 4;
- bo = -1;
- }
+ if (bom == 0x0000FEFF) {
+ q += 4;
+ bo = 1;
+ }
+ else if (bom == 0xFFFE0000) {
+ q += 4;
+ bo = -1;
+ }
#endif
- }
+ }
}
if (bo == -1) {
}
while (q < e) {
- Py_UCS4 ch;
- /* remaining bytes at the end? (size should be divisible by 4) */
- if (e-q<4) {
- if (consumed)
- break;
- errmsg = "truncated data";
- startinpos = ((const char *)q)-starts;
- endinpos = ((const char *)e)-starts;
- goto utf32Error;
- /* The remaining input chars are ignored if the callback
- chooses to skip the input */
- }
- ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
- (q[iorder[1]] << 8) | q[iorder[0]];
-
- if (ch >= 0x110000)
- {
- errmsg = "codepoint not in range(0x110000)";
- startinpos = ((const char *)q)-starts;
- endinpos = startinpos+4;
- goto utf32Error;
- }
+ Py_UCS4 ch;
+ /* remaining bytes at the end? (size should be divisible by 4) */
+ if (e-q<4) {
+ if (consumed)
+ break;
+ errmsg = "truncated data";
+ startinpos = ((const char *)q)-starts;
+ endinpos = ((const char *)e)-starts;
+ goto utf32Error;
+ /* The remaining input chars are ignored if the callback
+ chooses to skip the input */
+ }
+ ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
+ (q[iorder[1]] << 8) | q[iorder[0]];
+
+ if (ch >= 0x110000)
+ {
+ errmsg = "codepoint not in range(0x110000)";
+ startinpos = ((const char *)q)-starts;
+ endinpos = startinpos+4;
+ goto utf32Error;
+ }
#ifndef Py_UNICODE_WIDE
- if (ch >= 0x10000)
- {
- *p++ = 0xD800 | ((ch-0x10000) >> 10);
- *p++ = 0xDC00 | ((ch-0x10000) & 0x3FF);
- }
- else
+ if (ch >= 0x10000)
+ {
+ *p++ = 0xD800 | ((ch-0x10000) >> 10);
+ *p++ = 0xDC00 | ((ch-0x10000) & 0x3FF);
+ }
+ else
#endif
- *p++ = ch;
- q += 4;
- continue;
+ *p++ = ch;
+ q += 4;
+ continue;
utf32Error:
- outpos = p-PyUnicode_AS_UNICODE(unicode);
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "utf32", errmsg,
- &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
- &unicode, &outpos, &p))
- goto onError;
+ outpos = p-PyUnicode_AS_UNICODE(unicode);
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "utf32", errmsg,
+ &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
+ &unicode, &outpos, &p))
+ goto onError;
}
if (byteorder)
*byteorder = bo;
if (consumed)
- *consumed = (const char *)q-starts;
+ *consumed = (const char *)q-starts;
/* Adjust length */
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
PyObject *
PyUnicode_EncodeUTF32(const Py_UNICODE *s,
- Py_ssize_t size,
- const char *errors,
- int byteorder)
+ Py_ssize_t size,
+ const char *errors,
+ int byteorder)
{
PyObject *v;
unsigned char *p;
so we need less space. */
#ifndef Py_UNICODE_WIDE
for (i = pairs = 0; i < size-1; i++)
- if (0xD800 <= s[i] && s[i] <= 0xDBFF &&
- 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF)
- pairs++;
+ if (0xD800 <= s[i] && s[i] <= 0xDBFF &&
+ 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF)
+ pairs++;
#endif
nsize = (size - pairs + (byteorder == 0));
bytesize = nsize * 4;
if (bytesize / 4 != nsize)
- return PyErr_NoMemory();
+ return PyErr_NoMemory();
v = PyBytes_FromStringAndSize(NULL, bytesize);
if (v == NULL)
return NULL;
p = (unsigned char *)PyBytes_AS_STRING(v);
if (byteorder == 0)
- STORECHAR(0xFEFF);
+ STORECHAR(0xFEFF);
if (size == 0)
goto done;
}
while (size-- > 0) {
- Py_UCS4 ch = *s++;
+ Py_UCS4 ch = *s++;
#ifndef Py_UNICODE_WIDE
- if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
- Py_UCS4 ch2 = *s;
- if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
- ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
- s++;
- size--;
- }
- }
+ if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
+ Py_UCS4 ch2 = *s;
+ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
+ ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+ s++;
+ size--;
+ }
+ }
#endif
STORECHAR(ch);
}
return NULL;
}
return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL,
- 0);
+ PyUnicode_GET_SIZE(unicode),
+ NULL,
+ 0);
}
/* --- UTF-16 Codec ------------------------------------------------------- */
PyObject *
PyUnicode_DecodeUTF16(const char *s,
- Py_ssize_t size,
- const char *errors,
- int *byteorder)
+ Py_ssize_t size,
+ const char *errors,
+ int *byteorder)
{
return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
}
PyObject *
PyUnicode_DecodeUTF16Stateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- int *byteorder,
- Py_ssize_t *consumed)
+ Py_ssize_t size,
+ const char *errors,
+ int *byteorder,
+ Py_ssize_t *consumed)
{
const char *starts = s;
Py_ssize_t startinpos;
if (size >= 2) {
const Py_UNICODE bom = (q[ihi] << 8) | q[ilo];
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if (bom == 0xFEFF) {
- q += 2;
- bo = -1;
- }
- else if (bom == 0xFFFE) {
- q += 2;
- bo = 1;
- }
+ if (bom == 0xFEFF) {
+ q += 2;
+ bo = -1;
+ }
+ else if (bom == 0xFFFE) {
+ q += 2;
+ bo = 1;
+ }
#else
- if (bom == 0xFEFF) {
- q += 2;
- bo = 1;
- }
- else if (bom == 0xFFFE) {
- q += 2;
- bo = -1;
- }
+ if (bom == 0xFEFF) {
+ q += 2;
+ bo = 1;
+ }
+ else if (bom == 0xFFFE) {
+ q += 2;
+ bo = -1;
+ }
#endif
- }
+ }
}
if (bo == -1) {
aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
while (q < e) {
- Py_UNICODE ch;
+ Py_UNICODE ch;
/* First check for possible aligned read of a C 'long'. Unaligned
reads are more expensive, better to defer to another iteration. */
if (!((size_t) q & LONG_PTR_MASK)) {
if (q >= e)
break;
}
- ch = (q[ihi] << 8) | q[ilo];
-
- q += 2;
-
- if (ch < 0xD800 || ch > 0xDFFF) {
- *p++ = ch;
- continue;
- }
-
- /* UTF-16 code pair: */
- if (q > e) {
- errmsg = "unexpected end of data";
- startinpos = (((const char *)q) - 2) - starts;
- endinpos = ((const char *)e) + 1 - starts;
- goto utf16Error;
- }
- if (0xD800 <= ch && ch <= 0xDBFF) {
- Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
- q += 2;
- if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
+ ch = (q[ihi] << 8) | q[ilo];
+
+ q += 2;
+
+ if (ch < 0xD800 || ch > 0xDFFF) {
+ *p++ = ch;
+ continue;
+ }
+
+ /* UTF-16 code pair: */
+ if (q > e) {
+ errmsg = "unexpected end of data";
+ startinpos = (((const char *)q) - 2) - starts;
+ endinpos = ((const char *)e) + 1 - starts;
+ goto utf16Error;
+ }
+ if (0xD800 <= ch && ch <= 0xDBFF) {
+ Py_UNICODE ch2 = (q[ihi] << 8) | q[ilo];
+ q += 2;
+ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
#ifndef Py_UNICODE_WIDE
- *p++ = ch;
- *p++ = ch2;
+ *p++ = ch;
+ *p++ = ch2;
#else
- *p++ = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+ *p++ = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
#endif
- continue;
- }
- else {
+ continue;
+ }
+ else {
errmsg = "illegal UTF-16 surrogate";
- startinpos = (((const char *)q)-4)-starts;
- endinpos = startinpos+2;
- goto utf16Error;
- }
+ startinpos = (((const char *)q)-4)-starts;
+ endinpos = startinpos+2;
+ goto utf16Error;
+ }
- }
- errmsg = "illegal encoding";
- startinpos = (((const char *)q)-2)-starts;
- endinpos = startinpos+2;
- /* Fall through to report the error */
+ }
+ errmsg = "illegal encoding";
+ startinpos = (((const char *)q)-2)-starts;
+ endinpos = startinpos+2;
+ /* Fall through to report the error */
utf16Error:
- outpos = p - PyUnicode_AS_UNICODE(unicode);
- if (unicode_decode_call_errorhandler(
+ outpos = p - PyUnicode_AS_UNICODE(unicode);
+ if (unicode_decode_call_errorhandler(
errors,
&errorHandler,
"utf16", errmsg,
&unicode,
&outpos,
&p))
- goto onError;
+ goto onError;
}
/* remaining byte at the end? (size should be even) */
if (e == q) {
*byteorder = bo;
if (consumed)
- *consumed = (const char *)q-starts;
+ *consumed = (const char *)q-starts;
/* Adjust length */
if (_PyUnicode_Resize(&unicode, p - unicode->str) < 0)
PyObject *
PyUnicode_EncodeUTF16(const Py_UNICODE *s,
- Py_ssize_t size,
- const char *errors,
- int byteorder)
+ Py_ssize_t size,
+ const char *errors,
+ int byteorder)
{
PyObject *v;
unsigned char *p;
#ifdef Py_UNICODE_WIDE
for (i = pairs = 0; i < size; i++)
- if (s[i] >= 0x10000)
- pairs++;
+ if (s[i] >= 0x10000)
+ pairs++;
#endif
/* 2 * (size + pairs + (byteorder == 0)) */
if (size > PY_SSIZE_T_MAX ||
size > PY_SSIZE_T_MAX - pairs - (byteorder == 0))
- return PyErr_NoMemory();
+ return PyErr_NoMemory();
nsize = size + pairs + (byteorder == 0);
bytesize = nsize * 2;
if (bytesize / 2 != nsize)
- return PyErr_NoMemory();
+ return PyErr_NoMemory();
v = PyBytes_FromStringAndSize(NULL, bytesize);
if (v == NULL)
return NULL;
p = (unsigned char *)PyBytes_AS_STRING(v);
if (byteorder == 0)
- STORECHAR(0xFEFF);
+ STORECHAR(0xFEFF);
if (size == 0)
goto done;
}
while (size-- > 0) {
- Py_UNICODE ch = *s++;
- Py_UNICODE ch2 = 0;
+ Py_UNICODE ch = *s++;
+ Py_UNICODE ch2 = 0;
#ifdef Py_UNICODE_WIDE
- if (ch >= 0x10000) {
- ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF);
- ch = 0xD800 | ((ch-0x10000) >> 10);
- }
+ if (ch >= 0x10000) {
+ ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF);
+ ch = 0xD800 | ((ch-0x10000) >> 10);
+ }
#endif
STORECHAR(ch);
if (ch2)
return NULL;
}
return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL,
- 0);
+ PyUnicode_GET_SIZE(unicode),
+ NULL,
+ 0);
}
/* --- Unicode Escape Codec ----------------------------------------------- */
static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
static const char *hexdigits = "0123456789abcdef";
PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
- Py_ssize_t size)
+ Py_ssize_t size)
{
PyObject *repr;
char *p;
return PyBytes_FromStringAndSize(NULL, 0);
if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
- return PyErr_NoMemory();
+ return PyErr_NoMemory();
repr = PyBytes_FromStringAndSize(NULL,
2
*p++ = hexdigits[(ch >> 8) & 0x0000000F];
*p++ = hexdigits[(ch >> 4) & 0x0000000F];
*p++ = hexdigits[ch & 0x0000000F];
- continue;
+ continue;
}
#else
- /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
- else if (ch >= 0xD800 && ch < 0xDC00) {
- Py_UNICODE ch2;
- Py_UCS4 ucs;
-
- ch2 = *s++;
- size--;
- if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
- ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
- *p++ = '\\';
- *p++ = 'U';
- *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
- *p++ = hexdigits[ucs & 0x0000000F];
- continue;
- }
- /* Fall through: isolated surrogates are copied as-is */
- s--;
- size++;
- }
+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
+ else if (ch >= 0xD800 && ch < 0xDC00) {
+ Py_UNICODE ch2;
+ Py_UCS4 ucs;
+
+ ch2 = *s++;
+ size--;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
+ *p++ = hexdigits[ucs & 0x0000000F];
+ continue;
+ }
+ /* Fall through: isolated surrogates are copied as-is */
+ s--;
+ size++;
+ }
#endif
/* Map 16-bit characters to '\uxxxx' */
/* --- Raw Unicode Escape Codec ------------------------------------------- */
PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
handler might have to resize the string) */
v = _PyUnicode_New(size);
if (v == NULL)
- goto onError;
+ goto onError;
if (size == 0)
- return (PyObject *)v;
+ return (PyObject *)v;
p = PyUnicode_AS_UNICODE(v);
end = s + size;
while (s < end) {
- unsigned char c;
- Py_UCS4 x;
- int i;
+ unsigned char c;
+ Py_UCS4 x;
+ int i;
int count;
- /* Non-escape characters are interpreted as Unicode ordinals */
- if (*s != '\\') {
- *p++ = (unsigned char)*s++;
- continue;
- }
- startinpos = s-starts;
-
- /* \u-escapes are only interpreted iff the number of leading
- backslashes if odd */
- bs = s;
- for (;s < end;) {
- if (*s != '\\')
- break;
- *p++ = (unsigned char)*s++;
- }
- if (((s - bs) & 1) == 0 ||
- s >= end ||
- (*s != 'u' && *s != 'U')) {
- continue;
- }
- p--;
+ /* Non-escape characters are interpreted as Unicode ordinals */
+ if (*s != '\\') {
+ *p++ = (unsigned char)*s++;
+ continue;
+ }
+ startinpos = s-starts;
+
+ /* \u-escapes are only interpreted iff the number of leading
+ backslashes if odd */
+ bs = s;
+ for (;s < end;) {
+ if (*s != '\\')
+ break;
+ *p++ = (unsigned char)*s++;
+ }
+ if (((s - bs) & 1) == 0 ||
+ s >= end ||
+ (*s != 'u' && *s != 'U')) {
+ continue;
+ }
+ p--;
count = *s=='u' ? 4 : 8;
- s++;
-
- /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
- outpos = p-PyUnicode_AS_UNICODE(v);
- for (x = 0, i = 0; i < count; ++i, ++s) {
- c = (unsigned char)*s;
- if (!ISXDIGIT(c)) {
- endinpos = s-starts;
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "rawunicodeescape", "truncated \\uXXXX",
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p))
- goto onError;
- goto nextByte;
- }
- x = (x<<4) & ~0xF;
- if (c >= '0' && c <= '9')
- x += c - '0';
- else if (c >= 'a' && c <= 'f')
- x += 10 + c - 'a';
- else
- x += 10 + c - 'A';
- }
+ s++;
+
+ /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
+ outpos = p-PyUnicode_AS_UNICODE(v);
+ for (x = 0, i = 0; i < count; ++i, ++s) {
+ c = (unsigned char)*s;
+ if (!ISXDIGIT(c)) {
+ endinpos = s-starts;
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "rawunicodeescape", "truncated \\uXXXX",
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p))
+ goto onError;
+ goto nextByte;
+ }
+ x = (x<<4) & ~0xF;
+ if (c >= '0' && c <= '9')
+ x += c - '0';
+ else if (c >= 'a' && c <= 'f')
+ x += 10 + c - 'a';
+ else
+ x += 10 + c - 'A';
+ }
if (x <= 0xffff)
/* UCS-2 character */
*p++ = (Py_UNICODE) x;
if (unicode_decode_call_errorhandler(
errors, &errorHandler,
"rawunicodeescape", "\\Uxxxxxxxx out of range",
- &starts, &end, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p))
- goto onError;
+ &starts, &end, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p))
+ goto onError;
}
- nextByte:
- ;
+ nextByte:
+ ;
}
if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
- goto onError;
+ goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return (PyObject *)v;
}
PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
- Py_ssize_t size)
+ Py_ssize_t size)
{
PyObject *repr;
char *p;
#else
const Py_ssize_t expandsize = 6;
#endif
-
+
if (size > PY_SSIZE_T_MAX / expandsize)
- return PyErr_NoMemory();
-
+ return PyErr_NoMemory();
+
repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
if (repr == NULL)
return NULL;
while (size-- > 0) {
Py_UNICODE ch = *s++;
#ifdef Py_UNICODE_WIDE
- /* Map 32-bit characters to '\Uxxxxxxxx' */
- if (ch >= 0x10000) {
+ /* Map 32-bit characters to '\Uxxxxxxxx' */
+ if (ch >= 0x10000) {
*p++ = '\\';
*p++ = 'U';
*p++ = hexdigits[(ch >> 28) & 0xf];
}
else
#else
- /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
- if (ch >= 0xD800 && ch < 0xDC00) {
- Py_UNICODE ch2;
- Py_UCS4 ucs;
-
- ch2 = *s++;
- size--;
- if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
- ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
- *p++ = '\\';
- *p++ = 'U';
- *p++ = hexdigits[(ucs >> 28) & 0xf];
- *p++ = hexdigits[(ucs >> 24) & 0xf];
- *p++ = hexdigits[(ucs >> 20) & 0xf];
- *p++ = hexdigits[(ucs >> 16) & 0xf];
- *p++ = hexdigits[(ucs >> 12) & 0xf];
- *p++ = hexdigits[(ucs >> 8) & 0xf];
- *p++ = hexdigits[(ucs >> 4) & 0xf];
- *p++ = hexdigits[ucs & 0xf];
- continue;
- }
- /* Fall through: isolated surrogates are copied as-is */
- s--;
- size++;
- }
+ /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
+ if (ch >= 0xD800 && ch < 0xDC00) {
+ Py_UNICODE ch2;
+ Py_UCS4 ucs;
+
+ ch2 = *s++;
+ size--;
+ if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigits[(ucs >> 28) & 0xf];
+ *p++ = hexdigits[(ucs >> 24) & 0xf];
+ *p++ = hexdigits[(ucs >> 20) & 0xf];
+ *p++ = hexdigits[(ucs >> 16) & 0xf];
+ *p++ = hexdigits[(ucs >> 12) & 0xf];
+ *p++ = hexdigits[(ucs >> 8) & 0xf];
+ *p++ = hexdigits[(ucs >> 4) & 0xf];
+ *p++ = hexdigits[ucs & 0xf];
+ continue;
+ }
+ /* Fall through: isolated surrogates are copied as-is */
+ s--;
+ size++;
+ }
#endif
- /* Map 16-bit characters to '\uxxxx' */
- if (ch >= 256) {
+ /* Map 16-bit characters to '\uxxxx' */
+ if (ch >= 256) {
*p++ = '\\';
*p++ = 'u';
*p++ = hexdigits[(ch >> 12) & 0xf];
*p++ = hexdigits[(ch >> 4) & 0xf];
*p++ = hexdigits[ch & 15];
}
- /* Copy everything else as-is */
- else
+ /* Copy everything else as-is */
+ else
*p++ = (char) ch;
}
size = p - q;
/* --- Unicode Internal Codec ------------------------------------------- */
PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
/* XXX overflow detection missing */
v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
if (v == NULL)
- goto onError;
+ goto onError;
if (PyUnicode_GetSize((PyObject *)v) == 0)
- return (PyObject *)v;
+ return (PyObject *)v;
p = PyUnicode_AS_UNICODE(v);
end = s + size;
/* --- Latin-1 Codec ------------------------------------------------------ */
PyObject *PyUnicode_DecodeLatin1(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
PyUnicodeObject *v;
Py_UNICODE *p;
/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
if (size == 1) {
- Py_UNICODE r = *(unsigned char*)s;
- return PyUnicode_FromUnicode(&r, 1);
+ Py_UNICODE r = *(unsigned char*)s;
+ return PyUnicode_FromUnicode(&r, 1);
}
v = _PyUnicode_New(size);
if (v == NULL)
- goto onError;
+ goto onError;
if (size == 0)
- return (PyObject *)v;
+ return (PyObject *)v;
p = PyUnicode_AS_UNICODE(v);
e = s + size;
/* Unrolling the copy makes it much faster by reducing the looping
const char *reason)
{
if (*exceptionObject == NULL) {
- *exceptionObject = PyUnicodeEncodeError_Create(
- encoding, unicode, size, startpos, endpos, reason);
+ *exceptionObject = PyUnicodeEncodeError_Create(
+ encoding, unicode, size, startpos, endpos, reason);
}
else {
- if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
- goto onError;
- if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
- goto onError;
- if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
- goto onError;
- return;
- onError:
- Py_DECREF(*exceptionObject);
- *exceptionObject = NULL;
+ if (PyUnicodeEncodeError_SetStart(*exceptionObject, startpos))
+ goto onError;
+ if (PyUnicodeEncodeError_SetEnd(*exceptionObject, endpos))
+ goto onError;
+ if (PyUnicodeEncodeError_SetReason(*exceptionObject, reason))
+ goto onError;
+ return;
+ onError:
+ Py_DECREF(*exceptionObject);
+ *exceptionObject = NULL;
}
}
const char *reason)
{
make_encode_exception(exceptionObject,
- encoding, unicode, size, startpos, endpos, reason);
+ encoding, unicode, size, startpos, endpos, reason);
if (*exceptionObject != NULL)
- PyCodec_StrictErrors(*exceptionObject);
+ PyCodec_StrictErrors(*exceptionObject);
}
/* error handling callback helper:
PyObject *resunicode;
if (*errorHandler == NULL) {
- *errorHandler = PyCodec_LookupError(errors);
+ *errorHandler = PyCodec_LookupError(errors);
if (*errorHandler == NULL)
- return NULL;
+ return NULL;
}
make_encode_exception(exceptionObject,
- encoding, unicode, size, startpos, endpos, reason);
+ encoding, unicode, size, startpos, endpos, reason);
if (*exceptionObject == NULL)
- return NULL;
+ return NULL;
restuple = PyObject_CallFunctionObjArgs(
- *errorHandler, *exceptionObject, NULL);
+ *errorHandler, *exceptionObject, NULL);
if (restuple == NULL)
- return NULL;
+ return NULL;
if (!PyTuple_Check(restuple)) {
- PyErr_Format(PyExc_TypeError, &argparse[4]);
- Py_DECREF(restuple);
- return NULL;
+ PyErr_Format(PyExc_TypeError, &argparse[4]);
+ Py_DECREF(restuple);
+ return NULL;
}
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
- &resunicode, newpos)) {
- Py_DECREF(restuple);
- return NULL;
+ &resunicode, newpos)) {
+ Py_DECREF(restuple);
+ return NULL;
}
if (*newpos<0)
- *newpos = size+*newpos;
+ *newpos = size+*newpos;
if (*newpos<0 || *newpos>size) {
- PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
- Py_DECREF(restuple);
- return NULL;
+ PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
+ Py_DECREF(restuple);
+ return NULL;
}
Py_INCREF(resunicode);
Py_DECREF(restuple);
}
static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
- Py_ssize_t size,
- const char *errors,
- int limit)
+ Py_ssize_t size,
+ const char *errors,
+ int limit)
{
/* output object */
PyObject *res;
ressize = size;
while (p<endp) {
- Py_UNICODE c = *p;
-
- /* can we encode this? */
- if (c<limit) {
- /* no overflow check, because we know that the space is enough */
- *str++ = (char)c;
- ++p;
- }
- else {
- Py_ssize_t unicodepos = p-startp;
- Py_ssize_t requiredsize;
- PyObject *repunicode;
- Py_ssize_t repsize;
- Py_ssize_t newpos;
- Py_ssize_t respos;
- Py_UNICODE *uni2;
- /* startpos for collecting unencodable chars */
- const Py_UNICODE *collstart = p;
- const Py_UNICODE *collend = p;
- /* find all unecodable characters */
- while ((collend < endp) && ((*collend)>=limit))
- ++collend;
- /* cache callback name lookup (if not done yet, i.e. it's the first error) */
- if (known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- known_errorHandler = 4;
- else
- known_errorHandler = 0;
- }
- switch (known_errorHandler) {
- case 1: /* strict */
- raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
- goto onError;
- case 2: /* replace */
- while (collstart++<collend)
- *str++ = '?'; /* fall through */
- case 3: /* ignore */
- p = collend;
- break;
- case 4: /* xmlcharrefreplace */
- respos = str - PyBytes_AS_STRING(res);
- /* determine replacement size (temporarily (mis)uses p) */
- for (p = collstart, repsize = 0; p < collend; ++p) {
- if (*p<10)
- repsize += 2+1+1;
- else if (*p<100)
- repsize += 2+2+1;
- else if (*p<1000)
- repsize += 2+3+1;
- else if (*p<10000)
- repsize += 2+4+1;
+ Py_UNICODE c = *p;
+
+ /* can we encode this? */
+ if (c<limit) {
+ /* no overflow check, because we know that the space is enough */
+ *str++ = (char)c;
+ ++p;
+ }
+ else {
+ Py_ssize_t unicodepos = p-startp;
+ Py_ssize_t requiredsize;
+ PyObject *repunicode;
+ Py_ssize_t repsize;
+ Py_ssize_t newpos;
+ Py_ssize_t respos;
+ Py_UNICODE *uni2;
+ /* startpos for collecting unencodable chars */
+ const Py_UNICODE *collstart = p;
+ const Py_UNICODE *collend = p;
+ /* find all unecodable characters */
+ while ((collend < endp) && ((*collend)>=limit))
+ ++collend;
+ /* cache callback name lookup (if not done yet, i.e. it's the first error) */
+ if (known_errorHandler==-1) {
+ if ((errors==NULL) || (!strcmp(errors, "strict")))
+ known_errorHandler = 1;
+ else if (!strcmp(errors, "replace"))
+ known_errorHandler = 2;
+ else if (!strcmp(errors, "ignore"))
+ known_errorHandler = 3;
+ else if (!strcmp(errors, "xmlcharrefreplace"))
+ known_errorHandler = 4;
+ else
+ known_errorHandler = 0;
+ }
+ switch (known_errorHandler) {
+ case 1: /* strict */
+ raise_encode_exception(&exc, encoding, startp, size, collstart-startp, collend-startp, reason);
+ goto onError;
+ case 2: /* replace */
+ while (collstart++<collend)
+ *str++ = '?'; /* fall through */
+ case 3: /* ignore */
+ p = collend;
+ break;
+ case 4: /* xmlcharrefreplace */
+ respos = str - PyBytes_AS_STRING(res);
+ /* determine replacement size (temporarily (mis)uses p) */
+ for (p = collstart, repsize = 0; p < collend; ++p) {
+ if (*p<10)
+ repsize += 2+1+1;
+ else if (*p<100)
+ repsize += 2+2+1;
+ else if (*p<1000)
+ repsize += 2+3+1;
+ else if (*p<10000)
+ repsize += 2+4+1;
#ifndef Py_UNICODE_WIDE
- else
- repsize += 2+5+1;
+ else
+ repsize += 2+5+1;
#else
- else if (*p<100000)
- repsize += 2+5+1;
- else if (*p<1000000)
- repsize += 2+6+1;
- else
- repsize += 2+7+1;
+ else if (*p<100000)
+ repsize += 2+5+1;
+ else if (*p<1000000)
+ repsize += 2+6+1;
+ else
+ repsize += 2+7+1;
#endif
- }
- requiredsize = respos+repsize+(endp-collend);
- if (requiredsize > ressize) {
- if (requiredsize<2*ressize)
- requiredsize = 2*ressize;
- if (_PyBytes_Resize(&res, requiredsize))
- goto onError;
- str = PyBytes_AS_STRING(res) + respos;
- ressize = requiredsize;
- }
- /* generate replacement (temporarily (mis)uses p) */
- for (p = collstart; p < collend; ++p) {
- str += sprintf(str, "&#%d;", (int)*p);
- }
- p = collend;
- break;
- default:
- repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, startp, size, &exc,
- collstart-startp, collend-startp, &newpos);
- if (repunicode == NULL)
- goto onError;
- /* need more space? (at least enough for what we
- have+the replacement+the rest of the string, so
- we won't have to check space for encodable characters) */
- respos = str - PyBytes_AS_STRING(res);
- repsize = PyUnicode_GET_SIZE(repunicode);
- requiredsize = respos+repsize+(endp-collend);
- if (requiredsize > ressize) {
- if (requiredsize<2*ressize)
- requiredsize = 2*ressize;
- if (_PyBytes_Resize(&res, requiredsize)) {
- Py_DECREF(repunicode);
- goto onError;
- }
- str = PyBytes_AS_STRING(res) + respos;
- ressize = requiredsize;
- }
- /* check if there is anything unencodable in the replacement
- and copy it to the output */
- for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
- c = *uni2;
- if (c >= limit) {
- raise_encode_exception(&exc, encoding, startp, size,
- unicodepos, unicodepos+1, reason);
- Py_DECREF(repunicode);
- goto onError;
- }
- *str = (char)c;
- }
- p = startp + newpos;
- Py_DECREF(repunicode);
- }
- }
+ }
+ requiredsize = respos+repsize+(endp-collend);
+ if (requiredsize > ressize) {
+ if (requiredsize<2*ressize)
+ requiredsize = 2*ressize;
+ if (_PyBytes_Resize(&res, requiredsize))
+ goto onError;
+ str = PyBytes_AS_STRING(res) + respos;
+ ressize = requiredsize;
+ }
+ /* generate replacement (temporarily (mis)uses p) */
+ for (p = collstart; p < collend; ++p) {
+ str += sprintf(str, "&#%d;", (int)*p);
+ }
+ p = collend;
+ break;
+ default:
+ repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+ encoding, reason, startp, size, &exc,
+ collstart-startp, collend-startp, &newpos);
+ if (repunicode == NULL)
+ goto onError;
+ /* need more space? (at least enough for what we
+ have+the replacement+the rest of the string, so
+ we won't have to check space for encodable characters) */
+ respos = str - PyBytes_AS_STRING(res);
+ repsize = PyUnicode_GET_SIZE(repunicode);
+ requiredsize = respos+repsize+(endp-collend);
+ if (requiredsize > ressize) {
+ if (requiredsize<2*ressize)
+ requiredsize = 2*ressize;
+ if (_PyBytes_Resize(&res, requiredsize)) {
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ str = PyBytes_AS_STRING(res) + respos;
+ ressize = requiredsize;
+ }
+ /* check if there is anything unencodable in the replacement
+ and copy it to the output */
+ for (uni2 = PyUnicode_AS_UNICODE(repunicode);repsize-->0; ++uni2, ++str) {
+ c = *uni2;
+ if (c >= limit) {
+ raise_encode_exception(&exc, encoding, startp, size,
+ unicodepos, unicodepos+1, reason);
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ *str = (char)c;
+ }
+ p = startp + newpos;
+ Py_DECREF(repunicode);
+ }
+ }
}
/* Resize if we allocated to much */
size = str - PyBytes_AS_STRING(res);
}
PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
return unicode_encode_ucs1(p, size, errors, 256);
}
PyObject *PyUnicode_AsLatin1String(PyObject *unicode)
{
if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
- return NULL;
+ PyErr_BadArgument();
+ return NULL;
}
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
}
/* --- 7-bit ASCII Codec -------------------------------------------------- */
PyObject *PyUnicode_DecodeASCII(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
const char *starts = s;
PyUnicodeObject *v;
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
if (size == 1 && *(unsigned char*)s < 128) {
- Py_UNICODE r = *(unsigned char*)s;
- return PyUnicode_FromUnicode(&r, 1);
+ Py_UNICODE r = *(unsigned char*)s;
+ return PyUnicode_FromUnicode(&r, 1);
}
v = _PyUnicode_New(size);
if (v == NULL)
- goto onError;
+ goto onError;
if (size == 0)
- return (PyObject *)v;
+ return (PyObject *)v;
p = PyUnicode_AS_UNICODE(v);
e = s + size;
while (s < e) {
- register unsigned char c = (unsigned char)*s;
- if (c < 128) {
- *p++ = c;
- ++s;
- }
- else {
- startinpos = s-starts;
- endinpos = startinpos + 1;
- outpos = p - (Py_UNICODE *)PyUnicode_AS_UNICODE(v);
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "ascii", "ordinal not in range(128)",
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p))
- goto onError;
- }
+ register unsigned char c = (unsigned char)*s;
+ if (c < 128) {
+ *p++ = c;
+ ++s;
+ }
+ else {
+ startinpos = s-starts;
+ endinpos = startinpos + 1;
+ outpos = p - (Py_UNICODE *)PyUnicode_AS_UNICODE(v);
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "ascii", "ordinal not in range(128)",
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p))
+ goto onError;
+ }
}
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
- if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
- goto onError;
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
+ goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return (PyObject *)v;
}
PyObject *PyUnicode_EncodeASCII(const Py_UNICODE *p,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
return unicode_encode_ucs1(p, size, errors, 128);
}
PyObject *PyUnicode_AsASCIIString(PyObject *unicode)
{
if (!PyUnicode_Check(unicode)) {
- PyErr_BadArgument();
- return NULL;
+ PyErr_BadArgument();
+ return NULL;
}
return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
}
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
const char *curr = s + offset;
if (IsDBCSLeadByte(*curr)) {
- const char *prev = CharPrev(s, curr);
- return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2);
+ const char *prev = CharPrev(s, curr);
+ return (prev == curr) || !IsDBCSLeadByte(*prev) || (curr - prev == 2);
}
return 0;
}
* trailing lead-byte too. Returns consumed size if succeed, -1 otherwise.
*/
static int decode_mbcs(PyUnicodeObject **v,
- const char *s, /* MBCS string */
- int size, /* sizeof MBCS string */
- int final)
+ const char *s, /* MBCS string */
+ int size, /* sizeof MBCS string */
+ int final)
{
Py_UNICODE *p;
Py_ssize_t n = 0;
/* Skip trailing lead-byte unless 'final' is set */
if (!final && size >= 1 && is_dbcs_lead_byte(s, size - 1))
- --size;
+ --size;
/* First get the size of the result */
if (size > 0) {
- usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0);
- if (usize == 0) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
+ usize = MultiByteToWideChar(CP_ACP, 0, s, size, NULL, 0);
+ if (usize == 0) {
+ PyErr_SetFromWindowsErrWithFilename(0, NULL);
+ return -1;
+ }
}
if (*v == NULL) {
- /* Create unicode object */
- *v = _PyUnicode_New(usize);
- if (*v == NULL)
- return -1;
+ /* Create unicode object */
+ *v = _PyUnicode_New(usize);
+ if (*v == NULL)
+ return -1;
}
else {
- /* Extend unicode object */
- n = PyUnicode_GET_SIZE(*v);
- if (_PyUnicode_Resize(v, n + usize) < 0)
- return -1;
+ /* Extend unicode object */
+ n = PyUnicode_GET_SIZE(*v);
+ if (_PyUnicode_Resize(v, n + usize) < 0)
+ return -1;
}
/* Do the conversion */
if (size > 0) {
- p = PyUnicode_AS_UNICODE(*v) + n;
- if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
+ p = PyUnicode_AS_UNICODE(*v) + n;
+ if (0 == MultiByteToWideChar(CP_ACP, 0, s, size, p, usize)) {
+ PyErr_SetFromWindowsErrWithFilename(0, NULL);
+ return -1;
+ }
}
return size;
}
PyObject *PyUnicode_DecodeMBCSStateful(const char *s,
- Py_ssize_t size,
- const char *errors,
- Py_ssize_t *consumed)
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
{
PyUnicodeObject *v = NULL;
int done;
if (consumed)
- *consumed = 0;
+ *consumed = 0;
#ifdef NEED_RETRY
retry:
if (size > INT_MAX)
- done = decode_mbcs(&v, s, INT_MAX, 0);
+ done = decode_mbcs(&v, s, INT_MAX, 0);
else
#endif
- done = decode_mbcs(&v, s, (int)size, !consumed);
+ done = decode_mbcs(&v, s, (int)size, !consumed);
if (done < 0) {
Py_XDECREF(v);
- return NULL;
+ return NULL;
}
if (consumed)
- *consumed += done;
+ *consumed += done;
#ifdef NEED_RETRY
if (size > INT_MAX) {
- s += done;
- size -= done;
- goto retry;
+ s += done;
+ size -= done;
+ goto retry;
}
#endif
}
PyObject *PyUnicode_DecodeMBCS(const char *s,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
return PyUnicode_DecodeMBCSStateful(s, size, errors, NULL);
}
* Returns 0 if succeed, -1 otherwise.
*/
static int encode_mbcs(PyObject **repr,
- const Py_UNICODE *p, /* unicode */
- int size) /* size of unicode */
+ const Py_UNICODE *p, /* unicode */
+ int size) /* size of unicode */
{
int mbcssize = 0;
Py_ssize_t n = 0;
/* First get the size of the result */
if (size > 0) {
- mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL);
- if (mbcssize == 0) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
+ mbcssize = WideCharToMultiByte(CP_ACP, 0, p, size, NULL, 0, NULL, NULL);
+ if (mbcssize == 0) {
+ PyErr_SetFromWindowsErrWithFilename(0, NULL);
+ return -1;
+ }
}
if (*repr == NULL) {
- /* Create string object */
- *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
- if (*repr == NULL)
- return -1;
+ /* Create string object */
+ *repr = PyBytes_FromStringAndSize(NULL, mbcssize);
+ if (*repr == NULL)
+ return -1;
}
else {
- /* Extend string object */
- n = PyBytes_Size(*repr);
- if (_PyBytes_Resize(repr, n + mbcssize) < 0)
- return -1;
+ /* Extend string object */
+ n = PyBytes_Size(*repr);
+ if (_PyBytes_Resize(repr, n + mbcssize) < 0)
+ return -1;
}
/* Do the conversion */
if (size > 0) {
- char *s = PyBytes_AS_STRING(*repr) + n;
- if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
- PyErr_SetFromWindowsErrWithFilename(0, NULL);
- return -1;
- }
+ char *s = PyBytes_AS_STRING(*repr) + n;
+ if (0 == WideCharToMultiByte(CP_ACP, 0, p, size, s, mbcssize, NULL, NULL)) {
+ PyErr_SetFromWindowsErrWithFilename(0, NULL);
+ return -1;
+ }
}
return 0;
}
PyObject *PyUnicode_EncodeMBCS(const Py_UNICODE *p,
- Py_ssize_t size,
- const char *errors)
+ Py_ssize_t size,
+ const char *errors)
{
PyObject *repr = NULL;
int ret;
#ifdef NEED_RETRY
retry:
if (size > INT_MAX)
- ret = encode_mbcs(&repr, p, INT_MAX);
+ ret = encode_mbcs(&repr, p, INT_MAX);
else
#endif
- ret = encode_mbcs(&repr, p, (int)size);
+ ret = encode_mbcs(&repr, p, (int)size);
if (ret < 0) {
- Py_XDECREF(repr);
- return NULL;
+ Py_XDECREF(repr);
+ return NULL;
}
#ifdef NEED_RETRY
if (size > INT_MAX) {
- p += INT_MAX;
- size -= INT_MAX;
- goto retry;
+ p += INT_MAX;
+ size -= INT_MAX;
+ goto retry;
}
#endif
return NULL;
}
return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- NULL);
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
}
#undef NEED_RETRY
/* --- Character Mapping Codec -------------------------------------------- */
PyObject *PyUnicode_DecodeCharmap(const char *s,
- Py_ssize_t size,
- PyObject *mapping,
- const char *errors)
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors)
{
const char *starts = s;
Py_ssize_t startinpos;
/* Default to Latin-1 */
if (mapping == NULL)
- return PyUnicode_DecodeLatin1(s, size, errors);
+ return PyUnicode_DecodeLatin1(s, size, errors);
v = _PyUnicode_New(size);
if (v == NULL)
- goto onError;
+ goto onError;
if (size == 0)
- return (PyObject *)v;
+ return (PyObject *)v;
p = PyUnicode_AS_UNICODE(v);
e = s + size;
if (PyUnicode_CheckExact(mapping)) {
- mapstring = PyUnicode_AS_UNICODE(mapping);
- maplen = PyUnicode_GET_SIZE(mapping);
- while (s < e) {
- unsigned char ch = *s;
- Py_UNICODE x = 0xfffe; /* illegal value */
-
- if (ch < maplen)
- x = mapstring[ch];
-
- if (x == 0xfffe) {
- /* undefined mapping */
- outpos = p-PyUnicode_AS_UNICODE(v);
- startinpos = s-starts;
- endinpos = startinpos+1;
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "charmap", "character maps to <undefined>",
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p)) {
- goto onError;
- }
- continue;
- }
- *p++ = x;
- ++s;
- }
+ mapstring = PyUnicode_AS_UNICODE(mapping);
+ maplen = PyUnicode_GET_SIZE(mapping);
+ while (s < e) {
+ unsigned char ch = *s;
+ Py_UNICODE x = 0xfffe; /* illegal value */
+
+ if (ch < maplen)
+ x = mapstring[ch];
+
+ if (x == 0xfffe) {
+ /* undefined mapping */
+ outpos = p-PyUnicode_AS_UNICODE(v);
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "charmap", "character maps to <undefined>",
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p)) {
+ goto onError;
+ }
+ continue;
+ }
+ *p++ = x;
+ ++s;
+ }
}
else {
- while (s < e) {
- unsigned char ch = *s;
- PyObject *w, *x;
-
- /* Get mapping (char ordinal -> integer, Unicode char or None) */
- w = PyLong_FromLong((long)ch);
- if (w == NULL)
- goto onError;
- x = PyObject_GetItem(mapping, w);
- Py_DECREF(w);
- if (x == NULL) {
- if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found means: mapping is undefined. */
- PyErr_Clear();
- x = Py_None;
- Py_INCREF(x);
- } else
- goto onError;
- }
-
- /* Apply mapping */
- if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
- if (value < 0 || value > 65535) {
- PyErr_SetString(PyExc_TypeError,
- "character mapping must be in range(65536)");
- Py_DECREF(x);
- goto onError;
- }
- *p++ = (Py_UNICODE)value;
- }
- else if (x == Py_None) {
- /* undefined mapping */
- outpos = p-PyUnicode_AS_UNICODE(v);
- startinpos = s-starts;
- endinpos = startinpos+1;
- if (unicode_decode_call_errorhandler(
- errors, &errorHandler,
- "charmap", "character maps to <undefined>",
- &starts, &e, &startinpos, &endinpos, &exc, &s,
- &v, &outpos, &p)) {
- Py_DECREF(x);
- goto onError;
- }
- Py_DECREF(x);
- continue;
- }
- else if (PyUnicode_Check(x)) {
- Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
-
- if (targetsize == 1)
- /* 1-1 mapping */
- *p++ = *PyUnicode_AS_UNICODE(x);
-
- else if (targetsize > 1) {
- /* 1-n mapping */
- if (targetsize > extrachars) {
- /* resize first */
- Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);
- Py_ssize_t needed = (targetsize - extrachars) + \
- (targetsize << 2);
- extrachars += needed;
- /* XXX overflow detection missing */
- if (_PyUnicode_Resize(&v,
- PyUnicode_GET_SIZE(v) + needed) < 0) {
- Py_DECREF(x);
- goto onError;
- }
- p = PyUnicode_AS_UNICODE(v) + oldpos;
- }
- Py_UNICODE_COPY(p,
- PyUnicode_AS_UNICODE(x),
- targetsize);
- p += targetsize;
- extrachars -= targetsize;
- }
- /* 1-0 mapping: skip the character */
- }
- else {
- /* wrong return value */
- PyErr_SetString(PyExc_TypeError,
- "character mapping must return integer, None or str");
- Py_DECREF(x);
- goto onError;
- }
- Py_DECREF(x);
- ++s;
- }
+ while (s < e) {
+ unsigned char ch = *s;
+ PyObject *w, *x;
+
+ /* Get mapping (char ordinal -> integer, Unicode char or None) */
+ w = PyLong_FromLong((long)ch);
+ if (w == NULL)
+ goto onError;
+ x = PyObject_GetItem(mapping, w);
+ Py_DECREF(w);
+ if (x == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+ /* No mapping found means: mapping is undefined. */
+ PyErr_Clear();
+ x = Py_None;
+ Py_INCREF(x);
+ } else
+ goto onError;
+ }
+
+ /* Apply mapping */
+ if (PyLong_Check(x)) {
+ long value = PyLong_AS_LONG(x);
+ if (value < 0 || value > 65535) {
+ PyErr_SetString(PyExc_TypeError,
+ "character mapping must be in range(65536)");
+ Py_DECREF(x);
+ goto onError;
+ }
+ *p++ = (Py_UNICODE)value;
+ }
+ else if (x == Py_None) {
+ /* undefined mapping */
+ outpos = p-PyUnicode_AS_UNICODE(v);
+ startinpos = s-starts;
+ endinpos = startinpos+1;
+ if (unicode_decode_call_errorhandler(
+ errors, &errorHandler,
+ "charmap", "character maps to <undefined>",
+ &starts, &e, &startinpos, &endinpos, &exc, &s,
+ &v, &outpos, &p)) {
+ Py_DECREF(x);
+ goto onError;
+ }
+ Py_DECREF(x);
+ continue;
+ }
+ else if (PyUnicode_Check(x)) {
+ Py_ssize_t targetsize = PyUnicode_GET_SIZE(x);
+
+ if (targetsize == 1)
+ /* 1-1 mapping */
+ *p++ = *PyUnicode_AS_UNICODE(x);
+
+ else if (targetsize > 1) {
+ /* 1-n mapping */
+ if (targetsize > extrachars) {
+ /* resize first */
+ Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v);
+ Py_ssize_t needed = (targetsize - extrachars) + \
+ (targetsize << 2);
+ extrachars += needed;
+ /* XXX overflow detection missing */
+ if (_PyUnicode_Resize(&v,
+ PyUnicode_GET_SIZE(v) + needed) < 0) {
+ Py_DECREF(x);
+ goto onError;
+ }
+ p = PyUnicode_AS_UNICODE(v) + oldpos;
+ }
+ Py_UNICODE_COPY(p,
+ PyUnicode_AS_UNICODE(x),
+ targetsize);
+ p += targetsize;
+ extrachars -= targetsize;
+ }
+ /* 1-0 mapping: skip the character */
+ }
+ else {
+ /* wrong return value */
+ PyErr_SetString(PyExc_TypeError,
+ "character mapping must return integer, None or str");
+ Py_DECREF(x);
+ goto onError;
+ }
+ Py_DECREF(x);
+ ++s;
+ }
}
if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v))
- if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
- goto onError;
+ if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0)
+ goto onError;
Py_XDECREF(errorHandler);
Py_XDECREF(exc);
return (PyObject *)v;
encoding_map_size(PyObject *obj, PyObject* args)
{
struct encoding_map *map = (struct encoding_map*)obj;
- return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
+ return PyLong_FromLong(sizeof(*map) - 1 + 16*map->count2 +
128*map->count3);
}
static PyMethodDef encoding_map_methods[] = {
- {"size", encoding_map_size, METH_NOARGS,
+ {"size", encoding_map_size, METH_NOARGS,
PyDoc_STR("Return the size (in bytes) of this object") },
{ 0 }
};
static void
encoding_map_dealloc(PyObject* o)
{
- PyObject_FREE(o);
+ PyObject_FREE(o);
}
static PyTypeObject EncodingMapType = {
- PyVarObject_HEAD_INIT(NULL, 0)
+ PyVarObject_HEAD_INIT(NULL, 0)
"EncodingMap", /*tp_name*/
sizeof(struct encoding_map), /*tp_basicsize*/
0, /*tp_itemsize*/
if (level1[l1] == 0xFF)
level1[l1] = count2++;
if (level2[l2] == 0xFF)
- level2[l2] = count3++;
+ level2[l2] = count3++;
}
if (count2 >= 0xFF || count3 >= 0xFF)
#ifdef Py_UNICODE_WIDE
if (c > 0xFFFF) {
- return -1;
+ return -1;
}
#endif
if (c == 0)
PyObject *x;
if (w == NULL)
- return NULL;
+ return NULL;
x = PyObject_GetItem(mapping, w);
Py_DECREF(w);
if (x == NULL) {
- if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found means: mapping is undefined. */
- PyErr_Clear();
- x = Py_None;
- Py_INCREF(x);
- return x;
- } else
- return NULL;
+ if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+ /* No mapping found means: mapping is undefined. */
+ PyErr_Clear();
+ x = Py_None;
+ Py_INCREF(x);
+ return x;
+ } else
+ return NULL;
}
else if (x == Py_None)
- return x;
+ return x;
else if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
- if (value < 0 || value > 255) {
- PyErr_SetString(PyExc_TypeError,
- "character mapping must be in range(256)");
- Py_DECREF(x);
- return NULL;
- }
- return x;
+ long value = PyLong_AS_LONG(x);
+ if (value < 0 || value > 255) {
+ PyErr_SetString(PyExc_TypeError,
+ "character mapping must be in range(256)");
+ Py_DECREF(x);
+ return NULL;
+ }
+ return x;
}
else if (PyBytes_Check(x))
- return x;
+ return x;
else {
- /* wrong return value */
- PyErr_Format(PyExc_TypeError,
+ /* wrong return value */
+ PyErr_Format(PyExc_TypeError,
"character mapping must return integer, bytes or None, not %.400s",
x->ob_type->tp_name);
- Py_DECREF(x);
- return NULL;
+ Py_DECREF(x);
+ return NULL;
}
}
static int
charmapencode_resize(PyObject **outobj, Py_ssize_t *outpos, Py_ssize_t requiredsize)
{
- Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
- /* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2*outsize)
- requiredsize = 2*outsize;
- if (_PyBytes_Resize(outobj, requiredsize))
- return -1;
- return 0;
+ Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
+ /* exponentially overallocate to minimize reallocations */
+ if (requiredsize < 2*outsize)
+ requiredsize = 2*outsize;
+ if (_PyBytes_Resize(outobj, requiredsize))
+ return -1;
+ return 0;
}
-typedef enum charmapencode_result {
- enc_SUCCESS, enc_FAILED, enc_EXCEPTION
+typedef enum charmapencode_result {
+ enc_SUCCESS, enc_FAILED, enc_EXCEPTION
}charmapencode_result;
/* lookup the character, put the result in the output string and adjust
various state variables. Resize the output bytes object if not enough
if (Py_TYPE(mapping) == &EncodingMapType) {
int res = encoding_map_lookup(c, mapping);
- Py_ssize_t requiredsize = *outpos+1;
+ Py_ssize_t requiredsize = *outpos+1;
if (res == -1)
return enc_FAILED;
- if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize))
- return enc_EXCEPTION;
+ if (outsize<requiredsize)
+ if (charmapencode_resize(outobj, outpos, requiredsize))
+ return enc_EXCEPTION;
outstart = PyBytes_AS_STRING(*outobj);
- outstart[(*outpos)++] = (char)res;
- return enc_SUCCESS;
+ outstart[(*outpos)++] = (char)res;
+ return enc_SUCCESS;
}
rep = charmapencode_lookup(c, mapping);
if (rep==NULL)
- return enc_EXCEPTION;
+ return enc_EXCEPTION;
else if (rep==Py_None) {
- Py_DECREF(rep);
- return enc_FAILED;
+ Py_DECREF(rep);
+ return enc_FAILED;
} else {
- if (PyLong_Check(rep)) {
- Py_ssize_t requiredsize = *outpos+1;
- if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize)) {
- Py_DECREF(rep);
- return enc_EXCEPTION;
- }
+ if (PyLong_Check(rep)) {
+ Py_ssize_t requiredsize = *outpos+1;
+ if (outsize<requiredsize)
+ if (charmapencode_resize(outobj, outpos, requiredsize)) {
+ Py_DECREF(rep);
+ return enc_EXCEPTION;
+ }
outstart = PyBytes_AS_STRING(*outobj);
- outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
- }
- else {
- const char *repchars = PyBytes_AS_STRING(rep);
- Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
- Py_ssize_t requiredsize = *outpos+repsize;
- if (outsize<requiredsize)
- if (charmapencode_resize(outobj, outpos, requiredsize)) {
- Py_DECREF(rep);
- return enc_EXCEPTION;
- }
+ outstart[(*outpos)++] = (char)PyLong_AS_LONG(rep);
+ }
+ else {
+ const char *repchars = PyBytes_AS_STRING(rep);
+ Py_ssize_t repsize = PyBytes_GET_SIZE(rep);
+ Py_ssize_t requiredsize = *outpos+repsize;
+ if (outsize<requiredsize)
+ if (charmapencode_resize(outobj, outpos, requiredsize)) {
+ Py_DECREF(rep);
+ return enc_EXCEPTION;
+ }
outstart = PyBytes_AS_STRING(*outobj);
- memcpy(outstart + *outpos, repchars, repsize);
- *outpos += repsize;
- }
+ memcpy(outstart + *outpos, repchars, repsize);
+ *outpos += repsize;
+ }
}
Py_DECREF(rep);
return enc_SUCCESS;
while (collendpos < size) {
PyObject *rep;
if (Py_TYPE(mapping) == &EncodingMapType) {
- int res = encoding_map_lookup(p[collendpos], mapping);
- if (res != -1)
- break;
- ++collendpos;
- continue;
- }
-
- rep = charmapencode_lookup(p[collendpos], mapping);
- if (rep==NULL)
- return -1;
- else if (rep!=Py_None) {
- Py_DECREF(rep);
- break;
- }
- Py_DECREF(rep);
- ++collendpos;
+ int res = encoding_map_lookup(p[collendpos], mapping);
+ if (res != -1)
+ break;
+ ++collendpos;
+ continue;
+ }
+
+ rep = charmapencode_lookup(p[collendpos], mapping);
+ if (rep==NULL)
+ return -1;
+ else if (rep!=Py_None) {
+ Py_DECREF(rep);
+ break;
+ }
+ Py_DECREF(rep);
+ ++collendpos;
}
/* cache callback name lookup
* (if not done yet, i.e. it's the first error) */
if (*known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- *known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- *known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- *known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- *known_errorHandler = 4;
- else
- *known_errorHandler = 0;
+ if ((errors==NULL) || (!strcmp(errors, "strict")))
+ *known_errorHandler = 1;
+ else if (!strcmp(errors, "replace"))
+ *known_errorHandler = 2;
+ else if (!strcmp(errors, "ignore"))
+ *known_errorHandler = 3;
+ else if (!strcmp(errors, "xmlcharrefreplace"))
+ *known_errorHandler = 4;
+ else
+ *known_errorHandler = 0;
}
switch (*known_errorHandler) {
- case 1: /* strict */
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
- return -1;
- case 2: /* replace */
- for (collpos = collstartpos; collpos<collendpos; ++collpos) {
- x = charmapencode_output('?', mapping, res, respos);
- if (x==enc_EXCEPTION) {
- return -1;
- }
- else if (x==enc_FAILED) {
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
- return -1;
- }
- }
- /* fall through */
- case 3: /* ignore */
- *inpos = collendpos;
- break;
- case 4: /* xmlcharrefreplace */
- /* generate replacement (temporarily (mis)uses p) */
- for (collpos = collstartpos; collpos < collendpos; ++collpos) {
- char buffer[2+29+1+1];
- char *cp;
- sprintf(buffer, "&#%d;", (int)p[collpos]);
- for (cp = buffer; *cp; ++cp) {
- x = charmapencode_output(*cp, mapping, res, respos);
- if (x==enc_EXCEPTION)
- return -1;
- else if (x==enc_FAILED) {
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
- return -1;
- }
- }
- }
- *inpos = collendpos;
- break;
- default:
- repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
- encoding, reason, p, size, exceptionObject,
- collstartpos, collendpos, &newpos);
- if (repunicode == NULL)
- return -1;
- /* generate replacement */
- repsize = PyUnicode_GET_SIZE(repunicode);
- for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
- x = charmapencode_output(*uni2, mapping, res, respos);
- if (x==enc_EXCEPTION) {
- return -1;
- }
- else if (x==enc_FAILED) {
- Py_DECREF(repunicode);
- raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
- return -1;
- }
- }
- *inpos = newpos;
- Py_DECREF(repunicode);
+ case 1: /* strict */
+ raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ return -1;
+ case 2: /* replace */
+ for (collpos = collstartpos; collpos<collendpos; ++collpos) {
+ x = charmapencode_output('?', mapping, res, respos);
+ if (x==enc_EXCEPTION) {
+ return -1;
+ }
+ else if (x==enc_FAILED) {
+ raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ return -1;
+ }
+ }
+ /* fall through */
+ case 3: /* ignore */
+ *inpos = collendpos;
+ break;
+ case 4: /* xmlcharrefreplace */
+ /* generate replacement (temporarily (mis)uses p) */
+ for (collpos = collstartpos; collpos < collendpos; ++collpos) {
+ char buffer[2+29+1+1];
+ char *cp;
+ sprintf(buffer, "&#%d;", (int)p[collpos]);
+ for (cp = buffer; *cp; ++cp) {
+ x = charmapencode_output(*cp, mapping, res, respos);
+ if (x==enc_EXCEPTION)
+ return -1;
+ else if (x==enc_FAILED) {
+ raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ return -1;
+ }
+ }
+ }
+ *inpos = collendpos;
+ break;
+ default:
+ repunicode = unicode_encode_call_errorhandler(errors, errorHandler,
+ encoding, reason, p, size, exceptionObject,
+ collstartpos, collendpos, &newpos);
+ if (repunicode == NULL)
+ return -1;
+ /* generate replacement */
+ repsize = PyUnicode_GET_SIZE(repunicode);
+ for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+ x = charmapencode_output(*uni2, mapping, res, respos);
+ if (x==enc_EXCEPTION) {
+ return -1;
+ }
+ else if (x==enc_FAILED) {
+ Py_DECREF(repunicode);
+ raise_encode_exception(exceptionObject, encoding, p, size, collstartpos, collendpos, reason);
+ return -1;
+ }
+ }
+ *inpos = newpos;
+ Py_DECREF(repunicode);
}
return 0;
}
PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
- Py_ssize_t size,
- PyObject *mapping,
- const char *errors)
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors)
{
/* output object */
PyObject *res = NULL;
/* Default to Latin-1 */
if (mapping == NULL)
- return PyUnicode_EncodeLatin1(p, size, errors);
+ return PyUnicode_EncodeLatin1(p, size, errors);
/* allocate enough for a simple encoding without
replacements, if we need more, we'll resize */
if (res == NULL)
goto onError;
if (size == 0)
- return res;
+ return res;
while (inpos<size) {
- /* try to encode it */
- charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
- if (x==enc_EXCEPTION) /* error */
- goto onError;
- if (x==enc_FAILED) { /* unencodable character */
- if (charmap_encoding_error(p, size, &inpos, mapping,
- &exc,
- &known_errorHandler, &errorHandler, errors,
- &res, &respos)) {
- goto onError;
- }
- }
- else
- /* done with this character => adjust input position */
- ++inpos;
+ /* try to encode it */
+ charmapencode_result x = charmapencode_output(p[inpos], mapping, &res, &respos);
+ if (x==enc_EXCEPTION) /* error */
+ goto onError;
+ if (x==enc_FAILED) { /* unencodable character */
+ if (charmap_encoding_error(p, size, &inpos, mapping,
+ &exc,
+ &known_errorHandler, &errorHandler, errors,
+ &res, &respos)) {
+ goto onError;
+ }
+ }
+ else
+ /* done with this character => adjust input position */
+ ++inpos;
}
/* Resize if we allocated to much */
}
PyObject *PyUnicode_AsCharmapString(PyObject *unicode,
- PyObject *mapping)
+ PyObject *mapping)
{
if (!PyUnicode_Check(unicode) || mapping == NULL) {
- PyErr_BadArgument();
- return NULL;
+ PyErr_BadArgument();
+ return NULL;
}
return PyUnicode_EncodeCharmap(PyUnicode_AS_UNICODE(unicode),
- PyUnicode_GET_SIZE(unicode),
- mapping,
- NULL);
+ PyUnicode_GET_SIZE(unicode),
+ mapping,
+ NULL);
}
/* create or adjust a UnicodeTranslateError */
const char *reason)
{
if (*exceptionObject == NULL) {
- *exceptionObject = PyUnicodeTranslateError_Create(
- unicode, size, startpos, endpos, reason);
+ *exceptionObject = PyUnicodeTranslateError_Create(
+ unicode, size, startpos, endpos, reason);
}
else {
- if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
- goto onError;
- if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
- goto onError;
- if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
- goto onError;
- return;
- onError:
- Py_DECREF(*exceptionObject);
- *exceptionObject = NULL;
+ if (PyUnicodeTranslateError_SetStart(*exceptionObject, startpos))
+ goto onError;
+ if (PyUnicodeTranslateError_SetEnd(*exceptionObject, endpos))
+ goto onError;
+ if (PyUnicodeTranslateError_SetReason(*exceptionObject, reason))
+ goto onError;
+ return;
+ onError:
+ Py_DECREF(*exceptionObject);
+ *exceptionObject = NULL;
}
}
const char *reason)
{
make_translate_exception(exceptionObject,
- unicode, size, startpos, endpos, reason);
+ unicode, size, startpos, endpos, reason);
if (*exceptionObject != NULL)
- PyCodec_StrictErrors(*exceptionObject);
+ PyCodec_StrictErrors(*exceptionObject);
}
/* error handling callback helper:
PyObject *resunicode;
if (*errorHandler == NULL) {
- *errorHandler = PyCodec_LookupError(errors);
+ *errorHandler = PyCodec_LookupError(errors);
if (*errorHandler == NULL)
- return NULL;
+ return NULL;
}
make_translate_exception(exceptionObject,
- unicode, size, startpos, endpos, reason);
+ unicode, size, startpos, endpos, reason);
if (*exceptionObject == NULL)
- return NULL;
+ return NULL;
restuple = PyObject_CallFunctionObjArgs(
- *errorHandler, *exceptionObject, NULL);
+ *errorHandler, *exceptionObject, NULL);
if (restuple == NULL)
- return NULL;
+ return NULL;
if (!PyTuple_Check(restuple)) {
- PyErr_Format(PyExc_TypeError, &argparse[4]);
- Py_DECREF(restuple);
- return NULL;
+ PyErr_Format(PyExc_TypeError, &argparse[4]);
+ Py_DECREF(restuple);
+ return NULL;
}
if (!PyArg_ParseTuple(restuple, argparse, &PyUnicode_Type,
- &resunicode, &i_newpos)) {
- Py_DECREF(restuple);
- return NULL;
+ &resunicode, &i_newpos)) {
+ Py_DECREF(restuple);
+ return NULL;
}
if (i_newpos<0)
- *newpos = size+i_newpos;
+ *newpos = size+i_newpos;
else
*newpos = i_newpos;
if (*newpos<0 || *newpos>size) {
- PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
- Py_DECREF(restuple);
- return NULL;
+ PyErr_Format(PyExc_IndexError, "position %zd from error handler out of bounds", *newpos);
+ Py_DECREF(restuple);
+ return NULL;
}
Py_INCREF(resunicode);
Py_DECREF(restuple);
PyObject *x;
if (w == NULL)
- return -1;
+ return -1;
x = PyObject_GetItem(mapping, w);
Py_DECREF(w);
if (x == NULL) {
- if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found means: use 1:1 mapping. */
- PyErr_Clear();
- *result = NULL;
- return 0;
- } else
- return -1;
+ if (PyErr_ExceptionMatches(PyExc_LookupError)) {
+ /* No mapping found means: use 1:1 mapping. */
+ PyErr_Clear();
+ *result = NULL;
+ return 0;
+ } else
+ return -1;
}
else if (x == Py_None) {
- *result = x;
- return 0;
+ *result = x;
+ return 0;
}
else if (PyLong_Check(x)) {
- long value = PyLong_AS_LONG(x);
- long max = PyUnicode_GetMax();
- if (value < 0 || value > max) {
- PyErr_Format(PyExc_TypeError,
+ long value = PyLong_AS_LONG(x);
+ long max = PyUnicode_GetMax();
+ if (value < 0 || value > max) {
+ PyErr_Format(PyExc_TypeError,
"character mapping must be in range(0x%x)", max+1);
- Py_DECREF(x);
- return -1;
- }
- *result = x;
- return 0;
+ Py_DECREF(x);
+ return -1;
+ }
+ *result = x;
+ return 0;
}
else if (PyUnicode_Check(x)) {
- *result = x;
- return 0;
+ *result = x;
+ return 0;
}
else {
- /* wrong return value */
- PyErr_SetString(PyExc_TypeError,
- "character mapping must return integer, None or str");
- Py_DECREF(x);
- return -1;
+ /* wrong return value */
+ PyErr_SetString(PyExc_TypeError,
+ "character mapping must return integer, None or str");
+ Py_DECREF(x);
+ return -1;
}
}
/* ensure that *outobj is at least requiredsize characters long,
{
Py_ssize_t oldsize = PyUnicode_GET_SIZE(*outobj);
if (requiredsize > oldsize) {
- /* remember old output position */
- Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
- /* exponentially overallocate to minimize reallocations */
- if (requiredsize < 2 * oldsize)
- requiredsize = 2 * oldsize;
- if (PyUnicode_Resize(outobj, requiredsize) < 0)
- return -1;
- *outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
+ /* remember old output position */
+ Py_ssize_t outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
+ /* exponentially overallocate to minimize reallocations */
+ if (requiredsize < 2 * oldsize)
+ requiredsize = 2 * oldsize;
+ if (PyUnicode_Resize(outobj, requiredsize) < 0)
+ return -1;
+ *outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
}
return 0;
}
PyObject **res)
{
if (charmaptranslate_lookup(*curinp, mapping, res))
- return -1;
+ return -1;
if (*res==NULL) {
- /* not found => default to 1:1 mapping */
- *(*outp)++ = *curinp;
+ /* not found => default to 1:1 mapping */
+ *(*outp)++ = *curinp;
}
else if (*res==Py_None)
- ;
+ ;
else if (PyLong_Check(*res)) {
- /* no overflow check, because we know that the space is enough */
- *(*outp)++ = (Py_UNICODE)PyLong_AS_LONG(*res);
+ /* no overflow check, because we know that the space is enough */
+ *(*outp)++ = (Py_UNICODE)PyLong_AS_LONG(*res);
}
else if (PyUnicode_Check(*res)) {
- Py_ssize_t repsize = PyUnicode_GET_SIZE(*res);
- if (repsize==1) {
- /* no overflow check, because we know that the space is enough */
- *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
- }
- else if (repsize!=0) {
- /* more than one character */
- Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
- (insize - (curinp-startinp)) +
- repsize - 1;
- if (charmaptranslate_makespace(outobj, outp, requiredsize))
- return -1;
- memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
- *outp += repsize;
- }
+ Py_ssize_t repsize = PyUnicode_GET_SIZE(*res);
+ if (repsize==1) {
+ /* no overflow check, because we know that the space is enough */
+ *(*outp)++ = *PyUnicode_AS_UNICODE(*res);
+ }
+ else if (repsize!=0) {
+ /* more than one character */
+ Py_ssize_t requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
+ (insize - (curinp-startinp)) +
+ repsize - 1;
+ if (charmaptranslate_makespace(outobj, outp, requiredsize))
+ return -1;
+ memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
+ *outp += repsize;
+ }
}
else
- return -1;
+ return -1;
return 0;
}
PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
- Py_ssize_t size,
- PyObject *mapping,
- const char *errors)
+ Py_ssize_t size,
+ PyObject *mapping,
+ const char *errors)
{
/* output object */
PyObject *res = NULL;
int known_errorHandler = -1;
if (mapping == NULL) {
- PyErr_BadArgument();
- return NULL;
+ PyErr_BadArgument();
+ return NULL;
}
/* allocate enough for a simple 1:1 translation without
replacements, if we need more, we'll resize */
res = PyUnicode_FromUnicode(NULL, size);
if (res == NULL)
- goto onError;
+ goto onError;
if (size == 0)
- return res;
+ return res;
str = PyUnicode_AS_UNICODE(res);
while (p<endp) {
- /* try to encode it */
- PyObject *x = NULL;
- if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
- Py_XDECREF(x);
- goto onError;
- }
- Py_XDECREF(x);
- if (x!=Py_None) /* it worked => adjust input pointer */
- ++p;
- else { /* untranslatable character */
- PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
- Py_ssize_t repsize;
- Py_ssize_t newpos;
- Py_UNICODE *uni2;
- /* startpos for collecting untranslatable chars */
- const Py_UNICODE *collstart = p;
- const Py_UNICODE *collend = p+1;
- const Py_UNICODE *coll;
-
- /* find all untranslatable characters */
- while (collend < endp) {
- if (charmaptranslate_lookup(*collend, mapping, &x))
- goto onError;
- Py_XDECREF(x);
- if (x!=Py_None)
- break;
- ++collend;
- }
- /* cache callback name lookup
- * (if not done yet, i.e. it's the first error) */
- if (known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- known_errorHandler = 4;
- else
- known_errorHandler = 0;
- }
- switch (known_errorHandler) {
- case 1: /* strict */
- raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
- goto onError;
- case 2: /* replace */
- /* No need to check for space, this is a 1:1 replacement */
- for (coll = collstart; coll<collend; ++coll)
- *str++ = '?';
- /* fall through */
- case 3: /* ignore */
- p = collend;
- break;
- case 4: /* xmlcharrefreplace */
- /* generate replacement (temporarily (mis)uses p) */
- for (p = collstart; p < collend; ++p) {
- char buffer[2+29+1+1];
- char *cp;
- sprintf(buffer, "&#%d;", (int)*p);
- if (charmaptranslate_makespace(&res, &str,
- (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
- goto onError;
- for (cp = buffer; *cp; ++cp)
- *str++ = *cp;
- }
- p = collend;
- break;
- default:
- repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
- reason, startp, size, &exc,
- collstart-startp, collend-startp, &newpos);
- if (repunicode == NULL)
- goto onError;
- /* generate replacement */
- repsize = PyUnicode_GET_SIZE(repunicode);
- if (charmaptranslate_makespace(&res, &str,
- (str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
- Py_DECREF(repunicode);
- goto onError;
- }
- for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
- *str++ = *uni2;
- p = startp + newpos;
- Py_DECREF(repunicode);
- }
- }
+ /* try to encode it */
+ PyObject *x = NULL;
+ if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
+ Py_XDECREF(x);
+ goto onError;
+ }
+ Py_XDECREF(x);
+ if (x!=Py_None) /* it worked => adjust input pointer */
+ ++p;
+ else { /* untranslatable character */
+ PyObject *repunicode = NULL; /* initialize to prevent gcc warning */
+ Py_ssize_t repsize;
+ Py_ssize_t newpos;
+ Py_UNICODE *uni2;
+ /* startpos for collecting untranslatable chars */
+ const Py_UNICODE *collstart = p;
+ const Py_UNICODE *collend = p+1;
+ const Py_UNICODE *coll;
+
+ /* find all untranslatable characters */
+ while (collend < endp) {
+ if (charmaptranslate_lookup(*collend, mapping, &x))
+ goto onError;
+ Py_XDECREF(x);
+ if (x!=Py_None)
+ break;
+ ++collend;
+ }
+ /* cache callback name lookup
+ * (if not done yet, i.e. it's the first error) */
+ if (known_errorHandler==-1) {
+ if ((errors==NULL) || (!strcmp(errors, "strict")))
+ known_errorHandler = 1;
+ else if (!strcmp(errors, "replace"))
+ known_errorHandler = 2;
+ else if (!strcmp(errors, "ignore"))
+ known_errorHandler = 3;
+ else if (!strcmp(errors, "xmlcharrefreplace"))
+ known_errorHandler = 4;
+ else
+ known_errorHandler = 0;
+ }
+ switch (known_errorHandler) {
+ case 1: /* strict */
+ raise_translate_exception(&exc, startp, size, collstart-startp, collend-startp, reason);
+ goto onError;
+ case 2: /* replace */
+ /* No need to check for space, this is a 1:1 replacement */
+ for (coll = collstart; coll<collend; ++coll)
+ *str++ = '?';
+ /* fall through */
+ case 3: /* ignore */
+ p = collend;
+ break;
+ case 4: /* xmlcharrefreplace */
+ /* generate replacement (temporarily (mis)uses p) */
+ for (p = collstart; p < collend; ++p) {
+ char buffer[2+29+1+1];
+ char *cp;
+ sprintf(buffer, "&#%d;", (int)*p);
+ if (charmaptranslate_makespace(&res, &str,
+ (str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
+ goto onError;
+ for (cp = buffer; *cp; ++cp)
+ *str++ = *cp;
+ }
+ p = collend;
+ break;
+ default:
+ repunicode = unicode_translate_call_errorhandler(errors, &errorHandler,
+ reason, startp, size, &exc,
+ collstart-startp, collend-startp, &newpos);
+ if (repunicode == NULL)
+ goto onError;
+ /* generate replacement */
+ repsize = PyUnicode_GET_SIZE(repunicode);
+ if (charmaptranslate_makespace(&res, &str,
+ (str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2)
+ *str++ = *uni2;
+ p = startp + newpos;
+ Py_DECREF(repunicode);
+ }
+ }
}
/* Resize if we allocated to much */
respos = str-PyUnicode_AS_UNICODE(res);
if (respos<PyUnicode_GET_SIZE(res)) {
- if (PyUnicode_Resize(&res, respos) < 0)
- goto onError;
+ if (PyUnicode_Resize(&res, respos) < 0)
+ goto onError;
}
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
}
PyObject *PyUnicode_Translate(PyObject *str,
- PyObject *mapping,
- const char *errors)
+ PyObject *mapping,
+ const char *errors)
{
PyObject *result;
str = PyUnicode_FromObject(str);
if (str == NULL)
- goto onError;
+ goto onError;
result = PyUnicode_TranslateCharmap(PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str),
- mapping,
- errors);
+ PyUnicode_GET_SIZE(str),
+ mapping,
+ errors);
Py_DECREF(str);
return result;
/* --- Decimal Encoder ---------------------------------------------------- */
int PyUnicode_EncodeDecimal(Py_UNICODE *s,
- Py_ssize_t length,
- char *output,
- const char *errors)
+ Py_ssize_t length,
+ char *output,
+ const char *errors)
{
Py_UNICODE *p, *end;
PyObject *errorHandler = NULL;
int known_errorHandler = -1;
if (output == NULL) {
- PyErr_BadArgument();
- return -1;
+ PyErr_BadArgument();
+ return -1;
}
p = s;
end = s + length;
while (p < end) {
- register Py_UNICODE ch = *p;
- int decimal;
- PyObject *repunicode;
- Py_ssize_t repsize;
- Py_ssize_t newpos;
- Py_UNICODE *uni2;
- Py_UNICODE *collstart;
- Py_UNICODE *collend;
-
- if (Py_UNICODE_ISSPACE(ch)) {
- *output++ = ' ';
- ++p;
- continue;
- }
- decimal = Py_UNICODE_TODECIMAL(ch);
- if (decimal >= 0) {
- *output++ = '0' + decimal;
- ++p;
- continue;
- }
- if (0 < ch && ch < 256) {
- *output++ = (char)ch;
- ++p;
- continue;
- }
- /* All other characters are considered unencodable */
- collstart = p;
- collend = p+1;
- while (collend < end) {
- if ((0 < *collend && *collend < 256) ||
- !Py_UNICODE_ISSPACE(*collend) ||
- Py_UNICODE_TODECIMAL(*collend))
- break;
- }
- /* cache callback name lookup
- * (if not done yet, i.e. it's the first error) */
- if (known_errorHandler==-1) {
- if ((errors==NULL) || (!strcmp(errors, "strict")))
- known_errorHandler = 1;
- else if (!strcmp(errors, "replace"))
- known_errorHandler = 2;
- else if (!strcmp(errors, "ignore"))
- known_errorHandler = 3;
- else if (!strcmp(errors, "xmlcharrefreplace"))
- known_errorHandler = 4;
- else
- known_errorHandler = 0;
- }
- switch (known_errorHandler) {
- case 1: /* strict */
- raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
- goto onError;
- case 2: /* replace */
- for (p = collstart; p < collend; ++p)
- *output++ = '?';
- /* fall through */
- case 3: /* ignore */
- p = collend;
- break;
- case 4: /* xmlcharrefreplace */
- /* generate replacement (temporarily (mis)uses p) */
- for (p = collstart; p < collend; ++p)
- output += sprintf(output, "&#%d;", (int)*p);
- p = collend;
- break;
- default:
- repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
- encoding, reason, s, length, &exc,
- collstart-s, collend-s, &newpos);
- if (repunicode == NULL)
- goto onError;
- /* generate replacement */
- repsize = PyUnicode_GET_SIZE(repunicode);
- for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
- Py_UNICODE ch = *uni2;
- if (Py_UNICODE_ISSPACE(ch))
- *output++ = ' ';
- else {
- decimal = Py_UNICODE_TODECIMAL(ch);
- if (decimal >= 0)
- *output++ = '0' + decimal;
- else if (0 < ch && ch < 256)
- *output++ = (char)ch;
- else {
- Py_DECREF(repunicode);
- raise_encode_exception(&exc, encoding,
- s, length, collstart-s, collend-s, reason);
- goto onError;
- }
- }
- }
- p = s + newpos;
- Py_DECREF(repunicode);
- }
+ register Py_UNICODE ch = *p;
+ int decimal;
+ PyObject *repunicode;
+ Py_ssize_t repsize;
+ Py_ssize_t newpos;
+ Py_UNICODE *uni2;
+ Py_UNICODE *collstart;
+ Py_UNICODE *collend;
+
+ if (Py_UNICODE_ISSPACE(ch)) {
+ *output++ = ' ';
+ ++p;
+ continue;
+ }
+ decimal = Py_UNICODE_TODECIMAL(ch);
+ if (decimal >= 0) {
+ *output++ = '0' + decimal;
+ ++p;
+ continue;
+ }
+ if (0 < ch && ch < 256) {
+ *output++ = (char)ch;
+ ++p;
+ continue;
+ }
+ /* All other characters are considered unencodable */
+ collstart = p;
+ collend = p+1;
+ while (collend < end) {
+ if ((0 < *collend && *collend < 256) ||
+ !Py_UNICODE_ISSPACE(*collend) ||
+ Py_UNICODE_TODECIMAL(*collend))
+ break;
+ }
+ /* cache callback name lookup
+ * (if not done yet, i.e. it's the first error) */
+ if (known_errorHandler==-1) {
+ if ((errors==NULL) || (!strcmp(errors, "strict")))
+ known_errorHandler = 1;
+ else if (!strcmp(errors, "replace"))
+ known_errorHandler = 2;
+ else if (!strcmp(errors, "ignore"))
+ known_errorHandler = 3;
+ else if (!strcmp(errors, "xmlcharrefreplace"))
+ known_errorHandler = 4;
+ else
+ known_errorHandler = 0;
+ }
+ switch (known_errorHandler) {
+ case 1: /* strict */
+ raise_encode_exception(&exc, encoding, s, length, collstart-s, collend-s, reason);
+ goto onError;
+ case 2: /* replace */
+ for (p = collstart; p < collend; ++p)
+ *output++ = '?';
+ /* fall through */
+ case 3: /* ignore */
+ p = collend;
+ break;
+ case 4: /* xmlcharrefreplace */
+ /* generate replacement (temporarily (mis)uses p) */
+ for (p = collstart; p < collend; ++p)
+ output += sprintf(output, "&#%d;", (int)*p);
+ p = collend;
+ break;
+ default:
+ repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
+ encoding, reason, s, length, &exc,
+ collstart-s, collend-s, &newpos);
+ if (repunicode == NULL)
+ goto onError;
+ /* generate replacement */
+ repsize = PyUnicode_GET_SIZE(repunicode);
+ for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
+ Py_UNICODE ch = *uni2;
+ if (Py_UNICODE_ISSPACE(ch))
+ *output++ = ' ';
+ else {
+ decimal = Py_UNICODE_TODECIMAL(ch);
+ if (decimal >= 0)
+ *output++ = '0' + decimal;
+ else if (0 < ch && ch < 256)
+ *output++ = (char)ch;
+ else {
+ Py_DECREF(repunicode);
+ raise_encode_exception(&exc, encoding,
+ s, length, collstart-s, collend-s, reason);
+ goto onError;
+ }
+ }
+ }
+ p = s + newpos;
+ Py_DECREF(repunicode);
+ }
}
/* 0-terminate the output string */
*output++ = '\0';
str_obj = (PyUnicodeObject*) PyUnicode_FromObject(str);
if (!str_obj)
- return -1;
+ return -1;
sub_obj = (PyUnicodeObject*) PyUnicode_FromObject(substr);
if (!sub_obj) {
- Py_DECREF(str_obj);
- return -1;
+ Py_DECREF(str_obj);
+ return -1;
}
FIX_START_END(str_obj);
str = PyUnicode_FromObject(str);
if (!str)
- return -2;
+ return -2;
sub = PyUnicode_FromObject(sub);
if (!sub) {
- Py_DECREF(str);
- return -2;
+ Py_DECREF(str);
+ return -2;
}
if (direction > 0)
static
int tailmatch(PyUnicodeObject *self,
- PyUnicodeObject *substring,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
+ PyUnicodeObject *substring,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction)
{
if (substring->length == 0)
return 1;
end -= substring->length;
if (end < start)
- return 0;
+ return 0;
if (direction > 0) {
- if (Py_UNICODE_MATCH(self, end, substring))
- return 1;
+ if (Py_UNICODE_MATCH(self, end, substring))
+ return 1;
} else {
if (Py_UNICODE_MATCH(self, start, substring))
- return 1;
+ return 1;
}
return 0;
}
Py_ssize_t PyUnicode_Tailmatch(PyObject *str,
- PyObject *substr,
- Py_ssize_t start,
- Py_ssize_t end,
- int direction)
+ PyObject *substr,
+ Py_ssize_t start,
+ Py_ssize_t end,
+ int direction)
{
Py_ssize_t result;
str = PyUnicode_FromObject(str);
if (str == NULL)
- return -1;
+ return -1;
substr = PyUnicode_FromObject(substr);
if (substr == NULL) {
- Py_DECREF(str);
- return -1;
+ Py_DECREF(str);
+ return -1;
}
result = tailmatch((PyUnicodeObject *)str,
- (PyUnicodeObject *)substr,
- start, end, direction);
+ (PyUnicodeObject *)substr,
+ start, end, direction);
Py_DECREF(str);
Py_DECREF(substr);
return result;
static
PyObject *fixup(PyUnicodeObject *self,
- int (*fixfct)(PyUnicodeObject *s))
+ int (*fixfct)(PyUnicodeObject *s))
{
PyUnicodeObject *u;
u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
if (u == NULL)
- return NULL;
+ return NULL;
Py_UNICODE_COPY(u->str, self->str, self->length);
if (!fixfct(u) && PyUnicode_CheckExact(self)) {
- /* fixfct should return TRUE if it modified the buffer. If
- FALSE, return a reference to the original buffer instead
- (to save space, not time) */
- Py_INCREF(self);
- Py_DECREF(u);
- return (PyObject*) self;
+ /* fixfct should return TRUE if it modified the buffer. If
+ FALSE, return a reference to the original buffer instead
+ (to save space, not time) */
+ Py_INCREF(self);
+ Py_DECREF(u);
+ return (PyObject*) self;
}
return (PyObject*) u;
}
int status = 0;
while (len-- > 0) {
- register Py_UNICODE ch;
+ register Py_UNICODE ch;
- ch = Py_UNICODE_TOUPPER(*s);
- if (ch != *s) {
+ ch = Py_UNICODE_TOUPPER(*s);
+ if (ch != *s) {
status = 1;
- *s = ch;
- }
+ *s = ch;
+ }
s++;
}
int status = 0;
while (len-- > 0) {
- register Py_UNICODE ch;
+ register Py_UNICODE ch;
- ch = Py_UNICODE_TOLOWER(*s);
- if (ch != *s) {
+ ch = Py_UNICODE_TOLOWER(*s);
+ if (ch != *s) {
status = 1;
- *s = ch;
- }
+ *s = ch;
+ }
s++;
}
int status = 0;
if (len == 0)
- return 0;
+ return 0;
if (Py_UNICODE_ISLOWER(*s)) {
- *s = Py_UNICODE_TOUPPER(*s);
- status = 1;
+ *s = Py_UNICODE_TOUPPER(*s);
+ status = 1;
}
s++;
while (--len > 0) {
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1) {
- Py_UNICODE ch = Py_UNICODE_TOTITLE(*p);
- if (*p != ch) {
- *p = ch;
- return 1;
- }
- else
- return 0;
+ Py_UNICODE ch = Py_UNICODE_TOTITLE(*p);
+ if (*p != ch) {
+ *p = ch;
+ return 1;
+ }
+ else
+ return 0;
}
e = p + PyUnicode_GET_SIZE(self);
previous_is_cased = 0;
for (; p < e; p++) {
- register const Py_UNICODE ch = *p;
+ register const Py_UNICODE ch = *p;
- if (previous_is_cased)
- *p = Py_UNICODE_TOLOWER(ch);
- else
- *p = Py_UNICODE_TOTITLE(ch);
+ if (previous_is_cased)
+ *p = Py_UNICODE_TOLOWER(ch);
+ else
+ *p = Py_UNICODE_TOTITLE(ch);
- if (Py_UNICODE_ISLOWER(ch) ||
- Py_UNICODE_ISUPPER(ch) ||
- Py_UNICODE_ISTITLE(ch))
- previous_is_cased = 1;
- else
- previous_is_cased = 0;
+ if (Py_UNICODE_ISLOWER(ch) ||
+ Py_UNICODE_ISUPPER(ch) ||
+ Py_UNICODE_ISTITLE(ch))
+ previous_is_cased = 1;
+ else
+ previous_is_cased = 0;
}
return 1;
}
fseq = PySequence_Fast(seq, "");
if (fseq == NULL) {
- return NULL;
+ return NULL;
}
/* NOTE: the following code can't call back into Python code,
seqlen = PySequence_Fast_GET_SIZE(fseq);
/* If empty sequence, return u"". */
if (seqlen == 0) {
- res = _PyUnicode_New(0); /* empty sequence; return u"" */
- goto Done;
+ res = _PyUnicode_New(0); /* empty sequence; return u"" */
+ goto Done;
}
items = PySequence_Fast_ITEMS(fseq);
/* If singleton sequence with an exact Unicode, return that. */
if (seqlen == 1) {
- item = items[0];
- if (PyUnicode_CheckExact(item)) {
- Py_INCREF(item);
- res = (PyUnicodeObject *)item;
- goto Done;
- }
+ item = items[0];
+ if (PyUnicode_CheckExact(item)) {
+ Py_INCREF(item);
+ res = (PyUnicodeObject *)item;
+ goto Done;
+ }
}
else {
/* Set up sep and seplen */
for (i = 0; i < seqlen; i++) {
const Py_ssize_t old_sz = sz;
item = items[i];
- if (!PyUnicode_Check(item)) {
- PyErr_Format(PyExc_TypeError,
- "sequence item %zd: expected str instance,"
- " %.80s found",
- i, Py_TYPE(item)->tp_name);
- goto onError;
- }
+ if (!PyUnicode_Check(item)) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected str instance,"
+ " %.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto onError;
+ }
sz += PyUnicode_GET_SIZE(item);
if (i != 0)
sz += seplen;
Py_ssize_t itemlen;
item = items[i];
itemlen = PyUnicode_GET_SIZE(item);
- /* Copy item, and maybe the separator. */
- if (i) {
- Py_UNICODE_COPY(res_p, sep, seplen);
- res_p += seplen;
- }
- Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), itemlen);
- res_p += itemlen;
+ /* Copy item, and maybe the separator. */
+ if (i) {
+ Py_UNICODE_COPY(res_p, sep, seplen);
+ res_p += seplen;
+ }
+ Py_UNICODE_COPY(res_p, PyUnicode_AS_UNICODE(item), itemlen);
+ res_p += itemlen;
}
Done:
static
PyUnicodeObject *pad(PyUnicodeObject *self,
- Py_ssize_t left,
- Py_ssize_t right,
- Py_UNICODE fill)
+ Py_ssize_t left,
+ Py_ssize_t right,
+ Py_UNICODE fill)
{
PyUnicodeObject *u;
return u;
}
-#define SPLIT_APPEND(data, left, right) \
- str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
- if (!str) \
- goto onError; \
- if (PyList_Append(list, str)) { \
- Py_DECREF(str); \
- goto onError; \
- } \
- else \
+#define SPLIT_APPEND(data, left, right) \
+ str = PyUnicode_FromUnicode((data) + (left), (right) - (left)); \
+ if (!str) \
+ goto onError; \
+ if (PyList_Append(list, str)) { \
+ Py_DECREF(str); \
+ goto onError; \
+ } \
+ else \
Py_DECREF(str);
static
PyObject *split_whitespace(PyUnicodeObject *self,
- PyObject *list,
- Py_ssize_t maxcount)
+ PyObject *list,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
- /* find a token */
- while (i < len && Py_UNICODE_ISSPACE(buf[i]))
- i++;
- j = i;
- while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
- i++;
- if (j < i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(buf, j, i);
- while (i < len && Py_UNICODE_ISSPACE(buf[i]))
- i++;
- j = i;
- }
+ /* find a token */
+ while (i < len && Py_UNICODE_ISSPACE(buf[i]))
+ i++;
+ j = i;
+ while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
+ i++;
+ if (j < i) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(buf, j, i);
+ while (i < len && Py_UNICODE_ISSPACE(buf[i]))
+ i++;
+ j = i;
+ }
}
if (j < len) {
- SPLIT_APPEND(buf, j, len);
+ SPLIT_APPEND(buf, j, len);
}
return list;
}
PyObject *PyUnicode_Splitlines(PyObject *string,
- int keepends)
+ int keepends)
{
register Py_ssize_t i;
register Py_ssize_t j;
string = PyUnicode_FromObject(string);
if (string == NULL)
- return NULL;
+ return NULL;
data = PyUnicode_AS_UNICODE(string);
len = PyUnicode_GET_SIZE(string);
goto onError;
for (i = j = 0; i < len; ) {
- Py_ssize_t eol;
-
- /* Find a line and append it */
- while (i < len && !BLOOM_LINEBREAK(data[i]))
- i++;
-
- /* Skip the line break reading CRLF as one line break */
- eol = i;
- if (i < len) {
- if (data[i] == '\r' && i + 1 < len &&
- data[i+1] == '\n')
- i += 2;
- else
- i++;
- if (keepends)
- eol = i;
- }
- SPLIT_APPEND(data, j, eol);
- j = i;
+ Py_ssize_t eol;
+
+ /* Find a line and append it */
+ while (i < len && !BLOOM_LINEBREAK(data[i]))
+ i++;
+
+ /* Skip the line break reading CRLF as one line break */
+ eol = i;
+ if (i < len) {
+ if (data[i] == '\r' && i + 1 < len &&
+ data[i+1] == '\n')
+ i += 2;
+ else
+ i++;
+ if (keepends)
+ eol = i;
+ }
+ SPLIT_APPEND(data, j, eol);
+ j = i;
}
if (j < len) {
- SPLIT_APPEND(data, j, len);
+ SPLIT_APPEND(data, j, len);
}
Py_DECREF(string);
static
PyObject *split_char(PyUnicodeObject *self,
- PyObject *list,
- Py_UNICODE ch,
- Py_ssize_t maxcount)
+ PyObject *list,
+ Py_UNICODE ch,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
register const Py_UNICODE *buf = self->str;
for (i = j = 0; i < len; ) {
- if (buf[i] == ch) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(buf, j, i);
- i = j = i + 1;
- } else
- i++;
+ if (buf[i] == ch) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(buf, j, i);
+ i = j = i + 1;
+ } else
+ i++;
}
if (j <= len) {
- SPLIT_APPEND(buf, j, len);
+ SPLIT_APPEND(buf, j, len);
}
return list;
static
PyObject *split_substring(PyUnicodeObject *self,
- PyObject *list,
- PyUnicodeObject *substring,
- Py_ssize_t maxcount)
+ PyObject *list,
+ PyUnicodeObject *substring,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *str;
for (i = j = 0; i <= len - sublen; ) {
- if (Py_UNICODE_MATCH(self, i, substring)) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(self->str, j, i);
- i = j = i + sublen;
- } else
- i++;
+ if (Py_UNICODE_MATCH(self, i, substring)) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(self->str, j, i);
+ i = j = i + sublen;
+ } else
+ i++;
}
if (j <= len) {
- SPLIT_APPEND(self->str, j, len);
+ SPLIT_APPEND(self->str, j, len);
}
return list;
static
PyObject *rsplit_whitespace(PyUnicodeObject *self,
- PyObject *list,
- Py_ssize_t maxcount)
+ PyObject *list,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
- /* find a token */
- while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
- i--;
- j = i;
- while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
- i--;
- if (j > i) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(buf, i + 1, j + 1);
- while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
- i--;
- j = i;
- }
+ /* find a token */
+ while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
+ i--;
+ j = i;
+ while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
+ i--;
+ if (j > i) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(buf, i + 1, j + 1);
+ while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
+ i--;
+ j = i;
+ }
}
if (j >= 0) {
- SPLIT_APPEND(buf, 0, j + 1);
+ SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return NULL;
}
-static
+static
PyObject *rsplit_char(PyUnicodeObject *self,
- PyObject *list,
- Py_UNICODE ch,
- Py_ssize_t maxcount)
+ PyObject *list,
+ Py_UNICODE ch,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
register const Py_UNICODE *buf = self->str;
for (i = j = len - 1; i >= 0; ) {
- if (buf[i] == ch) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(buf, i + 1, j + 1);
- j = i = i - 1;
- } else
- i--;
+ if (buf[i] == ch) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(buf, i + 1, j + 1);
+ j = i = i - 1;
+ } else
+ i--;
}
if (j >= -1) {
- SPLIT_APPEND(buf, 0, j + 1);
+ SPLIT_APPEND(buf, 0, j + 1);
}
if (PyList_Reverse(list) < 0)
goto onError;
return NULL;
}
-static
+static
PyObject *rsplit_substring(PyUnicodeObject *self,
- PyObject *list,
- PyUnicodeObject *substring,
- Py_ssize_t maxcount)
+ PyObject *list,
+ PyUnicodeObject *substring,
+ Py_ssize_t maxcount)
{
register Py_ssize_t i;
register Py_ssize_t j;
PyObject *str;
for (i = len - sublen, j = len; i >= 0; ) {
- if (Py_UNICODE_MATCH(self, i, substring)) {
- if (maxcount-- <= 0)
- break;
- SPLIT_APPEND(self->str, i + sublen, j);
- j = i;
- i -= sublen;
- } else
- i--;
+ if (Py_UNICODE_MATCH(self, i, substring)) {
+ if (maxcount-- <= 0)
+ break;
+ SPLIT_APPEND(self->str, i + sublen, j);
+ j = i;
+ i -= sublen;
+ } else
+ i--;
}
if (j >= 0) {
- SPLIT_APPEND(self->str, 0, j);
+ SPLIT_APPEND(self->str, 0, j);
}
if (PyList_Reverse(list) < 0)
goto onError;
static
PyObject *split(PyUnicodeObject *self,
- PyUnicodeObject *substring,
- Py_ssize_t maxcount)
+ PyUnicodeObject *substring,
+ Py_ssize_t maxcount)
{
PyObject *list;
return NULL;
if (substring == NULL)
- return split_whitespace(self,list,maxcount);
+ return split_whitespace(self,list,maxcount);
else if (substring->length == 1)
- return split_char(self,list,substring->str[0],maxcount);
+ return split_char(self,list,substring->str[0],maxcount);
else if (substring->length == 0) {
- Py_DECREF(list);
- PyErr_SetString(PyExc_ValueError, "empty separator");
- return NULL;
+ Py_DECREF(list);
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ return NULL;
}
else
- return split_substring(self,list,substring,maxcount);
+ return split_substring(self,list,substring,maxcount);
}
static
PyObject *rsplit(PyUnicodeObject *self,
- PyUnicodeObject *substring,
- Py_ssize_t maxcount)
+ PyUnicodeObject *substring,
+ Py_ssize_t maxcount)
{
PyObject *list;
return NULL;
if (substring == NULL)
- return rsplit_whitespace(self,list,maxcount);
+ return rsplit_whitespace(self,list,maxcount);
else if (substring->length == 1)
- return rsplit_char(self,list,substring->str[0],maxcount);
+ return rsplit_char(self,list,substring->str[0],maxcount);
else if (substring->length == 0) {
- Py_DECREF(list);
- PyErr_SetString(PyExc_ValueError, "empty separator");
- return NULL;
+ Py_DECREF(list);
+ PyErr_SetString(PyExc_ValueError, "empty separator");
+ return NULL;
}
else
- return rsplit_substring(self,list,substring,maxcount);
+ return rsplit_substring(self,list,substring,maxcount);
}
static
PyObject *replace(PyUnicodeObject *self,
- PyUnicodeObject *str1,
- PyUnicodeObject *str2,
- Py_ssize_t maxcount)
+ PyUnicodeObject *str1,
+ PyUnicodeObject *str2,
+ Py_ssize_t maxcount)
{
PyUnicodeObject *u;
if (maxcount < 0)
- maxcount = PY_SSIZE_T_MAX;
+ maxcount = PY_SSIZE_T_MAX;
if (str1->length == str2->length) {
/* same length */
break;
j++;
}
- if (j > i) {
+ if (j > i) {
if (j > e)
break;
/* copy unchanged part [i:j] */
/* Capitalize each word */
for (i = 0; i < PyList_GET_SIZE(list); i++) {
item = fixup((PyUnicodeObject *)PyList_GET_ITEM(list, i),
- fixcapitalize);
+ fixcapitalize);
if (item == NULL)
goto onError;
Py_DECREF(PyList_GET_ITEM(list, i));
static int
convert_uc(PyObject *obj, void *addr)
{
- Py_UNICODE *fillcharloc = (Py_UNICODE *)addr;
- PyObject *uniobj;
- Py_UNICODE *unistr;
-
- uniobj = PyUnicode_FromObject(obj);
- if (uniobj == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "The fill character cannot be converted to Unicode");
- return 0;
- }
- if (PyUnicode_GET_SIZE(uniobj) != 1) {
- PyErr_SetString(PyExc_TypeError,
- "The fill character must be exactly one character long");
- Py_DECREF(uniobj);
- return 0;
- }
- unistr = PyUnicode_AS_UNICODE(uniobj);
- *fillcharloc = unistr[0];
- Py_DECREF(uniobj);
- return 1;
+ Py_UNICODE *fillcharloc = (Py_UNICODE *)addr;
+ PyObject *uniobj;
+ Py_UNICODE *unistr;
+
+ uniobj = PyUnicode_FromObject(obj);
+ if (uniobj == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "The fill character cannot be converted to Unicode");
+ return 0;
+ }
+ if (PyUnicode_GET_SIZE(uniobj) != 1) {
+ PyErr_SetString(PyExc_TypeError,
+ "The fill character must be exactly one character long");
+ Py_DECREF(uniobj);
+ return 0;
+ }
+ unistr = PyUnicode_AS_UNICODE(uniobj);
+ *fillcharloc = unistr[0];
+ Py_DECREF(uniobj);
+ return 1;
}
PyDoc_STRVAR(center__doc__,
c1 = *s1++;
c2 = *s2++;
- if (c1 > (1<<11) * 26)
- c1 += utf16Fixup[c1>>11];
- if (c2 > (1<<11) * 26)
+ if (c1 > (1<<11) * 26)
+ c1 += utf16Fixup[c1>>11];
+ if (c2 > (1<<11) * 26)
c2 += utf16Fixup[c2>>11];
/* now c1 and c2 are in UTF-32-compatible order */
#endif
int PyUnicode_Compare(PyObject *left,
- PyObject *right)
+ PyObject *right)
{
if (PyUnicode_Check(left) && PyUnicode_Check(right))
return unicode_compare((PyUnicodeObject *)left,
id = PyUnicode_AS_UNICODE(uni);
/* Compare Unicode string and source character set string */
for (i = 0; id[i] && str[i]; i++)
- if (id[i] != str[i])
- return ((int)id[i] < (int)str[i]) ? -1 : 1;
+ if (id[i] != str[i])
+ return ((int)id[i] < (int)str[i]) ? -1 : 1;
if (id[i])
- return 1; /* uni is longer */
+ return 1; /* uni is longer */
if (str[i])
- return -1; /* str is longer */
+ return -1; /* str is longer */
return 0;
}
#define TEST_COND(cond) \
- ((cond) ? Py_True : Py_False)
+ ((cond) ? Py_True : Py_False)
PyObject *PyUnicode_RichCompare(PyObject *left,
PyObject *right,
int op)
{
int result;
-
+
if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
PyObject *v;
if (((PyUnicodeObject *) left)->length !=
else
result = unicode_compare((PyUnicodeObject *)left,
(PyUnicodeObject *)right);
-
+
/* Convert the return value to a Boolean */
switch (op) {
case Py_EQ:
Py_INCREF(v);
return v;
}
-
+
Py_INCREF(Py_NotImplemented);
return Py_NotImplemented;
}
int PyUnicode_Contains(PyObject *container,
- PyObject *element)
+ PyObject *element)
{
PyObject *str, *sub;
int result;
/* Coerce the two arguments */
sub = PyUnicode_FromObject(element);
if (!sub) {
- PyErr_Format(PyExc_TypeError,
- "'in <string>' requires string as left operand, not %s",
- element->ob_type->tp_name);
+ PyErr_Format(PyExc_TypeError,
+ "'in <string>' requires string as left operand, not %s",
+ element->ob_type->tp_name);
return -1;
}
/* Concat to string or Unicode object giving a new Unicode object. */
PyObject *PyUnicode_Concat(PyObject *left,
- PyObject *right)
+ PyObject *right)
{
PyUnicodeObject *u = NULL, *v = NULL, *w;
/* Coerce the two arguments */
u = (PyUnicodeObject *)PyUnicode_FromObject(left);
if (u == NULL)
- goto onError;
+ goto onError;
v = (PyUnicodeObject *)PyUnicode_FromObject(right);
if (v == NULL)
- goto onError;
+ goto onError;
/* Shortcuts */
if (v == unicode_empty) {
- Py_DECREF(v);
- return (PyObject *)u;
+ Py_DECREF(v);
+ return (PyObject *)u;
}
if (u == unicode_empty) {
- Py_DECREF(u);
- return (PyObject *)v;
+ Py_DECREF(u);
+ return (PyObject *)v;
}
/* Concat the two Unicode strings */
w = _PyUnicode_New(u->length + v->length);
if (w == NULL)
- goto onError;
+ goto onError;
Py_UNICODE_COPY(w->str, u->str, u->length);
Py_UNICODE_COPY(w->str + u->length, v->str, v->length);
void
PyUnicode_Append(PyObject **pleft, PyObject *right)
{
- PyObject *new;
- if (*pleft == NULL)
- return;
- if (right == NULL || !PyUnicode_Check(*pleft)) {
- Py_DECREF(*pleft);
- *pleft = NULL;
- return;
- }
- new = PyUnicode_Concat(*pleft, right);
- Py_DECREF(*pleft);
- *pleft = new;
+ PyObject *new;
+ if (*pleft == NULL)
+ return;
+ if (right == NULL || !PyUnicode_Check(*pleft)) {
+ Py_DECREF(*pleft);
+ *pleft = NULL;
+ return;
+ }
+ new = PyUnicode_Concat(*pleft, right);
+ Py_DECREF(*pleft);
+ *pleft = new;
}
void
PyUnicode_AppendAndDel(PyObject **pleft, PyObject *right)
{
- PyUnicode_Append(pleft, right);
- Py_XDECREF(right);
+ PyUnicode_Append(pleft, right);
+ Py_XDECREF(right);
}
PyDoc_STRVAR(count__doc__,
PyObject *result;
if (!PyArg_ParseTuple(args, "O|O&O&:count", &substring,
- _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
+ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
return NULL;
substring = (PyUnicodeObject *)PyUnicode_FromObject(
(PyObject *)substring);
if (substring == NULL)
- return NULL;
+ return NULL;
FIX_START_END(self);
int tabsize = 8;
if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
- return NULL;
+ return NULL;
/* First pass: determine size of output string */
i = 0; /* chars up to and including most recent \n or \r */
e = self->str + self->length; /* end of input */
for (p = self->str; p < e; p++)
if (*p == '\t') {
- if (tabsize > 0) {
- incr = tabsize - (j % tabsize); /* cannot overflow */
- if (j > PY_SSIZE_T_MAX - incr)
- goto overflow1;
- j += incr;
+ if (tabsize > 0) {
+ incr = tabsize - (j % tabsize); /* cannot overflow */
+ if (j > PY_SSIZE_T_MAX - incr)
+ goto overflow1;
+ j += incr;
}
- }
+ }
else {
- if (j > PY_SSIZE_T_MAX - 1)
- goto overflow1;
+ if (j > PY_SSIZE_T_MAX - 1)
+ goto overflow1;
j++;
if (*p == '\n' || *p == '\r') {
- if (i > PY_SSIZE_T_MAX - j)
- goto overflow1;
+ if (i > PY_SSIZE_T_MAX - j)
+ goto overflow1;
i += j;
j = 0;
}
}
if (i > PY_SSIZE_T_MAX - j)
- goto overflow1;
+ goto overflow1;
/* Second pass: create output string and fill it */
u = _PyUnicode_New(i + j);
for (p = self->str; p < e; p++)
if (*p == '\t') {
- if (tabsize > 0) {
- i = tabsize - (j % tabsize);
- j += i;
- while (i--) {
- if (q >= qe)
- goto overflow2;
- *q++ = ' ';
+ if (tabsize > 0) {
+ i = tabsize - (j % tabsize);
+ j += i;
+ while (i--) {
+ if (q >= qe)
+ goto overflow2;
+ *q++ = ' ';
}
- }
- }
- else {
- if (q >= qe)
- goto overflow2;
- *q++ = *p;
+ }
+ }
+ else {
+ if (q >= qe)
+ goto overflow2;
+ *q++ = *p;
j++;
if (*p == '\n' || *p == '\r')
j = 0;
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1)
- return PyBool_FromLong(Py_UNICODE_ISLOWER(*p));
+ return PyBool_FromLong(Py_UNICODE_ISLOWER(*p));
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
cased = 0;
for (; p < e; p++) {
- register const Py_UNICODE ch = *p;
+ register const Py_UNICODE ch = *p;
- if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
- return PyBool_FromLong(0);
- else if (!cased && Py_UNICODE_ISLOWER(ch))
- cased = 1;
+ if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch))
+ return PyBool_FromLong(0);
+ else if (!cased && Py_UNICODE_ISLOWER(ch))
+ cased = 1;
}
return PyBool_FromLong(cased);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1)
- return PyBool_FromLong(Py_UNICODE_ISUPPER(*p) != 0);
+ return PyBool_FromLong(Py_UNICODE_ISUPPER(*p) != 0);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
cased = 0;
for (; p < e; p++) {
- register const Py_UNICODE ch = *p;
+ register const Py_UNICODE ch = *p;
- if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
- return PyBool_FromLong(0);
- else if (!cased && Py_UNICODE_ISUPPER(ch))
- cased = 1;
+ if (Py_UNICODE_ISLOWER(ch) || Py_UNICODE_ISTITLE(ch))
+ return PyBool_FromLong(0);
+ else if (!cased && Py_UNICODE_ISUPPER(ch))
+ cased = 1;
}
return PyBool_FromLong(cased);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1)
- return PyBool_FromLong((Py_UNICODE_ISTITLE(*p) != 0) ||
- (Py_UNICODE_ISUPPER(*p) != 0));
+ return PyBool_FromLong((Py_UNICODE_ISTITLE(*p) != 0) ||
+ (Py_UNICODE_ISUPPER(*p) != 0));
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
cased = 0;
previous_is_cased = 0;
for (; p < e; p++) {
- register const Py_UNICODE ch = *p;
-
- if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
- if (previous_is_cased)
- return PyBool_FromLong(0);
- previous_is_cased = 1;
- cased = 1;
- }
- else if (Py_UNICODE_ISLOWER(ch)) {
- if (!previous_is_cased)
- return PyBool_FromLong(0);
- previous_is_cased = 1;
- cased = 1;
- }
- else
- previous_is_cased = 0;
+ register const Py_UNICODE ch = *p;
+
+ if (Py_UNICODE_ISUPPER(ch) || Py_UNICODE_ISTITLE(ch)) {
+ if (previous_is_cased)
+ return PyBool_FromLong(0);
+ previous_is_cased = 1;
+ cased = 1;
+ }
+ else if (Py_UNICODE_ISLOWER(ch)) {
+ if (!previous_is_cased)
+ return PyBool_FromLong(0);
+ previous_is_cased = 1;
+ cased = 1;
+ }
+ else
+ previous_is_cased = 0;
}
return PyBool_FromLong(cased);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISSPACE(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISSPACE(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISSPACE(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISSPACE(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISALPHA(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISALPHA(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISALPHA(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISALPHA(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISALNUM(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISALNUM(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISALNUM(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISALNUM(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISDECIMAL(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISDECIMAL(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISDECIMAL(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISDECIMAL(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISDIGIT(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISDIGIT(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISDIGIT(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISDIGIT(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Shortcut for single character strings */
if (PyUnicode_GET_SIZE(self) == 1 &&
- Py_UNICODE_ISNUMERIC(*p))
- return PyBool_FromLong(1);
+ Py_UNICODE_ISNUMERIC(*p))
+ return PyBool_FromLong(1);
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return PyBool_FromLong(0);
+ return PyBool_FromLong(0);
e = p + PyUnicode_GET_SIZE(self);
for (; p < e; p++) {
- if (!Py_UNICODE_ISNUMERIC(*p))
- return PyBool_FromLong(0);
+ if (!Py_UNICODE_ISNUMERIC(*p))
+ return PyBool_FromLong(0);
}
return PyBool_FromLong(1);
}
/* Special case for empty strings */
if (PyUnicode_GET_SIZE(self) == 0)
- return 0;
+ return 0;
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
- start with letters and underscore, continue with
+ start with letters and underscore, continue with
letters, digits, underscore). However, given the current
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
e = p + PyUnicode_GET_SIZE(self);
for (p++; p < e; p++) {
- if (!_PyUnicode_IsXidContinue(*p))
- return 0;
+ if (!_PyUnicode_IsXidContinue(*p))
+ return 0;
}
return 1;
}
PyObject *
_PyUnicode_XStrip(PyUnicodeObject *self, int striptype, PyObject *sepobj)
{
- Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
- Py_ssize_t len = PyUnicode_GET_SIZE(self);
- Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
- Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj);
- Py_ssize_t i, j;
+ Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
+ Py_ssize_t len = PyUnicode_GET_SIZE(self);
+ Py_UNICODE *sep = PyUnicode_AS_UNICODE(sepobj);
+ Py_ssize_t seplen = PyUnicode_GET_SIZE(sepobj);
+ Py_ssize_t i, j;
BLOOM_MASK sepmask = make_bloom_mask(sep, seplen);
- i = 0;
- if (striptype != RIGHTSTRIP) {
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
while (i < len && BLOOM_MEMBER(sepmask, s[i], sep, seplen)) {
i++;
}
- }
+ }
- j = len;
- if (striptype != LEFTSTRIP) {
+ j = len;
+ if (striptype != LEFTSTRIP) {
do {
j--;
} while (j >= i && BLOOM_MEMBER(sepmask, s[j], sep, seplen));
j++;
- }
+ }
- if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
+ if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
Py_INCREF(self);
return (PyObject*)self;
- }
- else
+ }
+ else
return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
do_strip(PyUnicodeObject *self, int striptype)
{
- Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
- Py_ssize_t len = PyUnicode_GET_SIZE(self), i, j;
+ Py_UNICODE *s = PyUnicode_AS_UNICODE(self);
+ Py_ssize_t len = PyUnicode_GET_SIZE(self), i, j;
- i = 0;
- if (striptype != RIGHTSTRIP) {
- while (i < len && Py_UNICODE_ISSPACE(s[i])) {
- i++;
- }
- }
+ i = 0;
+ if (striptype != RIGHTSTRIP) {
+ while (i < len && Py_UNICODE_ISSPACE(s[i])) {
+ i++;
+ }
+ }
- j = len;
- if (striptype != LEFTSTRIP) {
- do {
- j--;
- } while (j >= i && Py_UNICODE_ISSPACE(s[j]));
- j++;
- }
+ j = len;
+ if (striptype != LEFTSTRIP) {
+ do {
+ j--;
+ } while (j >= i && Py_UNICODE_ISSPACE(s[j]));
+ j++;
+ }
- if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
- Py_INCREF(self);
- return (PyObject*)self;
- }
- else
- return PyUnicode_FromUnicode(s+i, j-i);
+ if (i == 0 && j == len && PyUnicode_CheckExact(self)) {
+ Py_INCREF(self);
+ return (PyObject*)self;
+ }
+ else
+ return PyUnicode_FromUnicode(s+i, j-i);
}
static PyObject *
do_argstrip(PyUnicodeObject *self, int striptype, PyObject *args)
{
- PyObject *sep = NULL;
+ PyObject *sep = NULL;
- if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
- return NULL;
+ if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
+ return NULL;
- if (sep != NULL && sep != Py_None) {
- if (PyUnicode_Check(sep))
- return _PyUnicode_XStrip(self, striptype, sep);
- else {
- PyErr_Format(PyExc_TypeError,
- "%s arg must be None or str",
- STRIPNAME(striptype));
- return NULL;
- }
- }
+ if (sep != NULL && sep != Py_None) {
+ if (PyUnicode_Check(sep))
+ return _PyUnicode_XStrip(self, striptype, sep);
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "%s arg must be None or str",
+ STRIPNAME(striptype));
+ return NULL;
+ }
+ }
- return do_strip(self, striptype);
+ return do_strip(self, striptype);
}
static PyObject *
unicode_strip(PyUnicodeObject *self, PyObject *args)
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, BOTHSTRIP); /* Common case */
- else
- return do_argstrip(self, BOTHSTRIP, args);
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, BOTHSTRIP); /* Common case */
+ else
+ return do_argstrip(self, BOTHSTRIP, args);
}
static PyObject *
unicode_lstrip(PyUnicodeObject *self, PyObject *args)
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, LEFTSTRIP); /* Common case */
- else
- return do_argstrip(self, LEFTSTRIP, args);
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, LEFTSTRIP); /* Common case */
+ else
+ return do_argstrip(self, LEFTSTRIP, args);
}
static PyObject *
unicode_rstrip(PyUnicodeObject *self, PyObject *args)
{
- if (PyTuple_GET_SIZE(args) == 0)
- return do_strip(self, RIGHTSTRIP); /* Common case */
- else
- return do_argstrip(self, RIGHTSTRIP, args);
+ if (PyTuple_GET_SIZE(args) == 0)
+ return do_strip(self, RIGHTSTRIP); /* Common case */
+ else
+ return do_argstrip(self, RIGHTSTRIP, args);
}
if (str->length == 1 && len > 0) {
Py_UNICODE_FILL(p, str->str[0], len);
} else {
- Py_ssize_t done = 0; /* number of characters copied this far */
- if (done < nchars) {
+ Py_ssize_t done = 0; /* number of characters copied this far */
+ if (done < nchars) {
Py_UNICODE_COPY(p, str->str, str->length);
done = str->length;
- }
- while (done < nchars) {
+ }
+ while (done < nchars) {
Py_ssize_t n = (done <= nchars-done) ? done : nchars-done;
Py_UNICODE_COPY(p+done, p, n);
done += n;
- }
+ }
}
return (PyObject*) u;
}
PyObject *PyUnicode_Replace(PyObject *obj,
- PyObject *subobj,
- PyObject *replobj,
- Py_ssize_t maxcount)
+ PyObject *subobj,
+ PyObject *replobj,
+ Py_ssize_t maxcount)
{
PyObject *self;
PyObject *str1;
self = PyUnicode_FromObject(obj);
if (self == NULL)
- return NULL;
+ return NULL;
str1 = PyUnicode_FromObject(subobj);
if (str1 == NULL) {
- Py_DECREF(self);
- return NULL;
+ Py_DECREF(self);
+ return NULL;
}
str2 = PyUnicode_FromObject(replobj);
if (str2 == NULL) {
- Py_DECREF(self);
- Py_DECREF(str1);
- return NULL;
+ Py_DECREF(self);
+ Py_DECREF(str1);
+ return NULL;
}
result = replace((PyUnicodeObject *)self,
- (PyUnicodeObject *)str1,
- (PyUnicodeObject *)str2,
- maxcount);
+ (PyUnicodeObject *)str1,
+ (PyUnicodeObject *)str2,
+ maxcount);
Py_DECREF(self);
Py_DECREF(str1);
Py_DECREF(str2);
return NULL;
str1 = (PyUnicodeObject *)PyUnicode_FromObject((PyObject *)str1);
if (str1 == NULL)
- return NULL;
+ return NULL;
str2 = (PyUnicodeObject *)PyUnicode_FromObject((PyObject *)str2);
if (str2 == NULL) {
- Py_DECREF(str1);
- return NULL;
+ Py_DECREF(str1);
+ return NULL;
}
result = replace(self, str1, str2, maxcount);
continue;
}
- /* Map special whitespace to '\t', \n', '\r' */
+ /* Map special whitespace to '\t', \n', '\r' */
if (ch == '\t') {
*p++ = '\\';
*p++ = 't';
*p++ = ch;
}
- /* Non-ASCII characters */
+ /* Non-ASCII characters */
else {
Py_UCS4 ucs = ch;
ch2 = *s;
if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
&& ch2 <= 0xDFFF) {
- ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF))
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF))
+ 0x00010000;
- s++;
+ s++;
size--;
}
}
#endif
- /* Map Unicode whitespace and control characters
+ /* Map Unicode whitespace and control characters
(categories Z* and C* except ASCII space)
*/
if (!Py_UNICODE_ISPRINTABLE(ucs)) {
Py_ssize_t result;
if (!_ParseTupleFinds(args, &substring, &start, &end))
- return NULL;
+ return NULL;
result = stringlib_rfind_slice(
PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
Py_ssize_t result;
if (!_ParseTupleFinds(args, &substring, &start, &end))
- return NULL;
+ return NULL;
result = stringlib_rfind_slice(
PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
}
PyObject *PyUnicode_Split(PyObject *s,
- PyObject *sep,
- Py_ssize_t maxsplit)
+ PyObject *sep,
+ Py_ssize_t maxsplit)
{
PyObject *result;
s = PyUnicode_FromObject(s);
if (s == NULL)
- return NULL;
+ return NULL;
if (sep != NULL) {
- sep = PyUnicode_FromObject(sep);
- if (sep == NULL) {
- Py_DECREF(s);
- return NULL;
- }
+ sep = PyUnicode_FromObject(sep);
+ if (sep == NULL) {
+ Py_DECREF(s);
+ return NULL;
+ }
}
result = split((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
return NULL;
if (substring == Py_None)
- return split(self, NULL, maxcount);
+ return split(self, NULL, maxcount);
else if (PyUnicode_Check(substring))
- return split(self, (PyUnicodeObject *)substring, maxcount);
+ return split(self, (PyUnicodeObject *)substring, maxcount);
else
- return PyUnicode_Split((PyObject *)self, substring, maxcount);
+ return PyUnicode_Split((PyObject *)self, substring, maxcount);
}
PyObject *
str_obj = PyUnicode_FromObject(str_in);
if (!str_obj)
- return NULL;
+ return NULL;
sep_obj = PyUnicode_FromObject(sep_in);
if (!sep_obj) {
Py_DECREF(str_obj);
str_obj = PyUnicode_FromObject(str_in);
if (!str_obj)
- return NULL;
+ return NULL;
sep_obj = PyUnicode_FromObject(sep_in);
if (!sep_obj) {
Py_DECREF(str_obj);
}
PyObject *PyUnicode_RSplit(PyObject *s,
- PyObject *sep,
- Py_ssize_t maxsplit)
+ PyObject *sep,
+ Py_ssize_t maxsplit)
{
PyObject *result;
-
+
s = PyUnicode_FromObject(s);
if (s == NULL)
- return NULL;
+ return NULL;
if (sep != NULL) {
- sep = PyUnicode_FromObject(sep);
- if (sep == NULL) {
- Py_DECREF(s);
- return NULL;
- }
+ sep = PyUnicode_FromObject(sep);
+ if (sep == NULL) {
+ Py_DECREF(s);
+ return NULL;
+ }
}
result = rsplit((PyUnicodeObject *)s, (PyUnicodeObject *)sep, maxsplit);
return NULL;
if (substring == Py_None)
- return rsplit(self, NULL, maxcount);
+ return rsplit(self, NULL, maxcount);
else if (PyUnicode_Check(substring))
- return rsplit(self, (PyUnicodeObject *)substring, maxcount);
+ return rsplit(self, (PyUnicodeObject *)substring, maxcount);
else
- return PyUnicode_RSplit((PyObject *)self, substring, maxcount);
+ return PyUnicode_RSplit((PyObject *)self, substring, maxcount);
}
PyDoc_STRVAR(splitlines__doc__,
PyObject *new = NULL, *key, *value;
Py_ssize_t i = 0;
int res;
-
+
if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
return NULL;
new = PyDict_New();
static PyObject *
unicode_startswith(PyUnicodeObject *self,
- PyObject *args)
+ PyObject *args)
{
PyObject *subobj;
PyUnicodeObject *substring;
int result;
if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
- _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
- return NULL;
+ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
+ return NULL;
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
static PyObject *
unicode_endswith(PyUnicodeObject *self,
- PyObject *args)
+ PyObject *args)
{
PyObject *subobj;
PyUnicodeObject *substring;
if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
- return NULL;
+ return NULL;
if (PyTuple_Check(subobj)) {
Py_ssize_t i;
for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
static PyObject *
unicode_getnewargs(PyUnicodeObject *v)
{
- return Py_BuildValue("(u#)", v->str, v->length);
+ return Py_BuildValue("(u#)", v->str, v->length);
}
{"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS},
#endif
- {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
+ {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS},
{NULL, NULL}
};
}
static PyNumberMethods unicode_as_number = {
- 0, /*nb_add*/
- 0, /*nb_subtract*/
- 0, /*nb_multiply*/
- unicode_mod, /*nb_remainder*/
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ unicode_mod, /*nb_remainder*/
};
static PySequenceMethods unicode_as_sequence = {
- (lenfunc) unicode_length, /* sq_length */
- PyUnicode_Concat, /* sq_concat */
- (ssizeargfunc) unicode_repeat, /* sq_repeat */
- (ssizeargfunc) unicode_getitem, /* sq_item */
- 0, /* sq_slice */
- 0, /* sq_ass_item */
- 0, /* sq_ass_slice */
- PyUnicode_Contains, /* sq_contains */
+ (lenfunc) unicode_length, /* sq_length */
+ PyUnicode_Concat, /* sq_concat */
+ (ssizeargfunc) unicode_repeat, /* sq_repeat */
+ (ssizeargfunc) unicode_getitem, /* sq_item */
+ 0, /* sq_slice */
+ 0, /* sq_ass_item */
+ 0, /* sq_ass_slice */
+ PyUnicode_Contains, /* sq_contains */
};
static PyObject*
PyObject* result;
if (PySlice_GetIndicesEx((PySliceObject*)item, PyUnicode_GET_SIZE(self),
- &start, &stop, &step, &slicelength) < 0) {
+ &start, &stop, &step, &slicelength) < 0) {
return NULL;
}
source_buf = PyUnicode_AS_UNICODE((PyObject*)self);
result_buf = (Py_UNICODE *)PyObject_MALLOC(slicelength*
sizeof(Py_UNICODE));
-
- if (result_buf == NULL)
- return PyErr_NoMemory();
+
+ if (result_buf == NULL)
+ return PyErr_NoMemory();
for (cur = start, i = 0; i < slicelength; cur += step, i++) {
result_buf[i] = source_buf[cur];
}
static PyMappingMethods unicode_as_mapping = {
- (lenfunc)unicode_length, /* mp_length */
- (binaryfunc)unicode_subscript, /* mp_subscript */
- (objobjargproc)0, /* mp_ass_subscript */
+ (lenfunc)unicode_length, /* mp_length */
+ (binaryfunc)unicode_subscript, /* mp_subscript */
+ (objobjargproc)0, /* mp_ass_subscript */
};
{
Py_ssize_t argidx = *p_argidx;
if (argidx < arglen) {
- (*p_argidx)++;
- if (arglen < 0)
- return args;
- else
- return PyTuple_GetItem(args, argidx);
+ (*p_argidx)++;
+ if (arglen < 0)
+ return args;
+ else
+ return PyTuple_GetItem(args, argidx);
}
PyErr_SetString(PyExc_TypeError,
- "not enough arguments for format string");
+ "not enough arguments for format string");
return NULL;
}
register Py_ssize_t i;
Py_ssize_t len = strlen(charbuffer);
for (i = len - 1; i >= 0; i--)
- buffer[i] = (Py_UNICODE) charbuffer[i];
+ buffer[i] = (Py_UNICODE) charbuffer[i];
return len;
}
return Py_SAFE_DOWNCAST(result, Py_ssize_t, int);
}
-#if 0
+#if 0
static int
longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x)
{
static int
formatfloat(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
+ size_t buflen,
+ int flags,
+ int prec,
+ int type,
+ PyObject *v)
{
/* fmt = '%#.' + `prec` + `type`
worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
- return -1;
+ return -1;
if (prec < 0)
- prec = 6;
+ prec = 6;
if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
- type = 'g';
+ type = 'g';
/* Worst case length calc to ensure no buffer overrun:
'g' formats:
- fmt = %#.<prec>g
- buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
- for any double rep.)
- len = 1 + prec + 1 + 2 + 5 = 9 + prec
+ fmt = %#.<prec>g
+ buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+ for any double rep.)
+ len = 1 + prec + 1 + 2 + 5 = 9 + prec
'f' formats:
- buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
- len = 1 + 50 + 1 + prec = 52 + prec
+ buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
+ len = 1 + 50 + 1 + prec = 52 + prec
If prec=0 the effective precision is 1 (the leading digit is
always given), therefore increase the length by one.
*/
- if (((type == 'g' || type == 'G') &&
+ if (((type == 'g' || type == 'G') &&
buflen <= (size_t)10 + (size_t)prec) ||
- (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted float is too long (precision too large?)");
- return -1;
+ (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "formatted float is too long (precision too large?)");
+ return -1;
}
PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
- (flags&F_ALT) ? "#" : "",
- prec, type);
+ (flags&F_ALT) ? "#" : "",
+ prec, type);
return doubletounicode(buf, buflen, fmt, x);
}
static PyObject*
formatlong(PyObject *val, int flags, int prec, int type)
{
- char *buf;
- int len;
- PyObject *str; /* temporary string object. */
- PyObject *result;
+ char *buf;
+ int len;
+ PyObject *str; /* temporary string object. */
+ PyObject *result;
- str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
- if (!str)
- return NULL;
- result = PyUnicode_FromStringAndSize(buf, len);
- Py_DECREF(str);
- return result;
+ str = _PyBytes_FormatLong(val, flags, prec, type, &buf, &len);
+ if (!str)
+ return NULL;
+ result = PyUnicode_FromStringAndSize(buf, len);
+ Py_DECREF(str);
+ return result;
}
#if 0
static int
formatint(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
+ size_t buflen,
+ int flags,
+ int prec,
+ int type,
+ PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
* worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
*/
if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
PyErr_SetString(PyExc_OverflowError,
- "formatted integer is too long (precision too large?)");
+ "formatted integer is too long (precision too large?)");
return -1;
}
(type == 'x' || type == 'X' || type == 'o')) {
/* When converting under %#o, %#x or %#X, there are a number
* of issues that cause pain:
- * - for %#o, we want a different base marker than C
+ * - for %#o, we want a different base marker than C
* - when 0 is being converted, the C standard leaves off
* the '0x' or '0X', which is inconsistent with other
* %#x/%#X conversions and inconsistent with Python's
{
/* presume that the buffer is at least 3 characters long */
if (PyUnicode_Check(v)) {
- if (PyUnicode_GET_SIZE(v) == 1) {
- buf[0] = PyUnicode_AS_UNICODE(v)[0];
- buf[1] = '\0';
- return 1;
- }
+ if (PyUnicode_GET_SIZE(v) == 1) {
+ buf[0] = PyUnicode_AS_UNICODE(v)[0];
+ buf[1] = '\0';
+ return 1;
+ }
#ifndef Py_UNICODE_WIDE
- if (PyUnicode_GET_SIZE(v) == 2) {
- /* Decode a valid surrogate pair */
- int c0 = PyUnicode_AS_UNICODE(v)[0];
- int c1 = PyUnicode_AS_UNICODE(v)[1];
- if (0xD800 <= c0 && c0 <= 0xDBFF &&
- 0xDC00 <= c1 && c1 <= 0xDFFF) {
- buf[0] = c0;
- buf[1] = c1;
- buf[2] = '\0';
- return 2;
- }
- }
+ if (PyUnicode_GET_SIZE(v) == 2) {
+ /* Decode a valid surrogate pair */
+ int c0 = PyUnicode_AS_UNICODE(v)[0];
+ int c1 = PyUnicode_AS_UNICODE(v)[1];
+ if (0xD800 <= c0 && c0 <= 0xDBFF &&
+ 0xDC00 <= c1 && c1 <= 0xDFFF) {
+ buf[0] = c0;
+ buf[1] = c1;
+ buf[2] = '\0';
+ return 2;
+ }
+ }
#endif
- goto onError;
+ goto onError;
}
else {
- /* Integer input truncated to a character */
+ /* Integer input truncated to a character */
long x;
- x = PyLong_AsLong(v);
- if (x == -1 && PyErr_Occurred())
- goto onError;
+ x = PyLong_AsLong(v);
+ if (x == -1 && PyErr_Occurred())
+ goto onError;
- if (x < 0 || x > 0x10ffff) {
- PyErr_SetString(PyExc_OverflowError,
- "%c arg not in range(0x110000)");
- return -1;
- }
+ if (x < 0 || x > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000)");
+ return -1;
+ }
#ifndef Py_UNICODE_WIDE
- if (x > 0xffff) {
- x -= 0x10000;
- buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
- buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
- return 2;
- }
+ if (x > 0xffff) {
+ x -= 0x10000;
+ buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
+ buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
+ return 2;
+ }
#endif
- buf[0] = (Py_UNICODE) x;
- buf[1] = '\0';
- return 1;
+ buf[0] = (Py_UNICODE) x;
+ buf[1] = '\0';
+ return 1;
}
onError:
PyErr_SetString(PyExc_TypeError,
- "%c requires int or char");
+ "%c requires int or char");
return -1;
}
#define FORMATBUFLEN (size_t)120
PyObject *PyUnicode_Format(PyObject *format,
- PyObject *args)
+ PyObject *args)
{
Py_UNICODE *fmt, *res;
Py_ssize_t fmtcnt, rescnt, reslen, arglen, argidx;
PyObject *uformat;
if (format == NULL || args == NULL) {
- PyErr_BadInternalCall();
- return NULL;
+ PyErr_BadInternalCall();
+ return NULL;
}
uformat = PyUnicode_FromObject(format);
if (uformat == NULL)
- return NULL;
+ return NULL;
fmt = PyUnicode_AS_UNICODE(uformat);
fmtcnt = PyUnicode_GET_SIZE(uformat);
reslen = rescnt = fmtcnt + 100;
result = _PyUnicode_New(reslen);
if (result == NULL)
- goto onError;
+ goto onError;
res = PyUnicode_AS_UNICODE(result);
if (PyTuple_Check(args)) {
- arglen = PyTuple_Size(args);
- argidx = 0;
+ arglen = PyTuple_Size(args);
+ argidx = 0;
}
else {
- arglen = -1;
- argidx = -2;
+ arglen = -1;
+ argidx = -2;
}
if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
!PyUnicode_Check(args))
- dict = args;
+ dict = args;
while (--fmtcnt >= 0) {
- if (*fmt != '%') {
- if (--rescnt < 0) {
- rescnt = fmtcnt + 100;
- reslen += rescnt;
- if (_PyUnicode_Resize(&result, reslen) < 0)
- goto onError;
- res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
- --rescnt;
- }
- *res++ = *fmt++;
- }
- else {
- /* Got a format specifier */
- int flags = 0;
- Py_ssize_t width = -1;
- int prec = -1;
- Py_UNICODE c = '\0';
- Py_UNICODE fill;
- int isnumok;
- PyObject *v = NULL;
- PyObject *temp = NULL;
- Py_UNICODE *pbuf;
- Py_UNICODE sign;
- Py_ssize_t len;
- Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
-
- fmt++;
- if (*fmt == '(') {
- Py_UNICODE *keystart;
- Py_ssize_t keylen;
- PyObject *key;
- int pcount = 1;
-
- if (dict == NULL) {
- PyErr_SetString(PyExc_TypeError,
- "format requires a mapping");
- goto onError;
- }
- ++fmt;
- --fmtcnt;
- keystart = fmt;
- /* Skip over balanced parentheses */
- while (pcount > 0 && --fmtcnt >= 0) {
- if (*fmt == ')')
- --pcount;
- else if (*fmt == '(')
- ++pcount;
- fmt++;
- }
- keylen = fmt - keystart - 1;
- if (fmtcnt < 0 || pcount > 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format key");
- goto onError;
- }
+ if (*fmt != '%') {
+ if (--rescnt < 0) {
+ rescnt = fmtcnt + 100;
+ reslen += rescnt;
+ if (_PyUnicode_Resize(&result, reslen) < 0)
+ goto onError;
+ res = PyUnicode_AS_UNICODE(result) + reslen - rescnt;
+ --rescnt;
+ }
+ *res++ = *fmt++;
+ }
+ else {
+ /* Got a format specifier */
+ int flags = 0;
+ Py_ssize_t width = -1;
+ int prec = -1;
+ Py_UNICODE c = '\0';
+ Py_UNICODE fill;
+ int isnumok;
+ PyObject *v = NULL;
+ PyObject *temp = NULL;
+ Py_UNICODE *pbuf;
+ Py_UNICODE sign;
+ Py_ssize_t len;
+ Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+
+ fmt++;
+ if (*fmt == '(') {
+ Py_UNICODE *keystart;
+ Py_ssize_t keylen;
+ PyObject *key;
+ int pcount = 1;
+
+ if (dict == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "format requires a mapping");
+ goto onError;
+ }
+ ++fmt;
+ --fmtcnt;
+ keystart = fmt;
+ /* Skip over balanced parentheses */
+ while (pcount > 0 && --fmtcnt >= 0) {
+ if (*fmt == ')')
+ --pcount;
+ else if (*fmt == '(')
+ ++pcount;
+ fmt++;
+ }
+ keylen = fmt - keystart - 1;
+ if (fmtcnt < 0 || pcount > 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format key");
+ goto onError;
+ }
#if 0
- /* keys are converted to strings using UTF-8 and
- then looked up since Python uses strings to hold
- variables names etc. in its namespaces and we
- wouldn't want to break common idioms. */
- key = PyUnicode_EncodeUTF8(keystart,
- keylen,
- NULL);
+ /* keys are converted to strings using UTF-8 and
+ then looked up since Python uses strings to hold
+ variables names etc. in its namespaces and we
+ wouldn't want to break common idioms. */
+ key = PyUnicode_EncodeUTF8(keystart,
+ keylen,
+ NULL);
#else
- key = PyUnicode_FromUnicode(keystart, keylen);
+ key = PyUnicode_FromUnicode(keystart, keylen);
#endif
- if (key == NULL)
- goto onError;
- if (args_owned) {
- Py_DECREF(args);
- args_owned = 0;
- }
- args = PyObject_GetItem(dict, key);
- Py_DECREF(key);
- if (args == NULL) {
- goto onError;
- }
- args_owned = 1;
- arglen = -1;
- argidx = -2;
- }
- while (--fmtcnt >= 0) {
- switch (c = *fmt++) {
- case '-': flags |= F_LJUST; continue;
- case '+': flags |= F_SIGN; continue;
- case ' ': flags |= F_BLANK; continue;
- case '#': flags |= F_ALT; continue;
- case '0': flags |= F_ZERO; continue;
- }
- break;
- }
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- width = PyLong_AsLong(v);
- if (width == -1 && PyErr_Occurred())
- goto onError;
- if (width < 0) {
- flags |= F_LJUST;
- width = -width;
- }
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= '0' && c <= '9') {
- width = c - '0';
- while (--fmtcnt >= 0) {
- c = *fmt++;
- if (c < '0' || c > '9')
- break;
- if ((width*10) / 10 != width) {
- PyErr_SetString(PyExc_ValueError,
- "width too big");
- goto onError;
- }
- width = width*10 + (c - '0');
- }
- }
- if (c == '.') {
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- if (c == '*') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- if (!PyLong_Check(v)) {
- PyErr_SetString(PyExc_TypeError,
- "* wants int");
- goto onError;
- }
- prec = PyLong_AsLong(v);
- if (prec == -1 && PyErr_Occurred())
- goto onError;
- if (prec < 0)
- prec = 0;
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- else if (c >= '0' && c <= '9') {
- prec = c - '0';
- while (--fmtcnt >= 0) {
- c = Py_CHARMASK(*fmt++);
- if (c < '0' || c > '9')
- break;
- if ((prec*10) / 10 != prec) {
- PyErr_SetString(PyExc_ValueError,
- "prec too big");
- goto onError;
- }
- prec = prec*10 + (c - '0');
- }
- }
- } /* prec */
- if (fmtcnt >= 0) {
- if (c == 'h' || c == 'l' || c == 'L') {
- if (--fmtcnt >= 0)
- c = *fmt++;
- }
- }
- if (fmtcnt < 0) {
- PyErr_SetString(PyExc_ValueError,
- "incomplete format");
- goto onError;
- }
- if (c != '%') {
- v = getnextarg(args, arglen, &argidx);
- if (v == NULL)
- goto onError;
- }
- sign = 0;
- fill = ' ';
- switch (c) {
-
- case '%':
- pbuf = formatbuf;
- /* presume that buffer length is at least 1 */
- pbuf[0] = '%';
- len = 1;
- break;
-
- case 's':
- case 'r':
- case 'a':
- if (PyUnicode_Check(v) && c == 's') {
- temp = v;
- Py_INCREF(temp);
- }
- else {
- if (c == 's')
- temp = PyObject_Str(v);
- else if (c == 'r')
- temp = PyObject_Repr(v);
- else
- temp = PyObject_ASCII(v);
- if (temp == NULL)
- goto onError;
+ if (key == NULL)
+ goto onError;
+ if (args_owned) {
+ Py_DECREF(args);
+ args_owned = 0;
+ }
+ args = PyObject_GetItem(dict, key);
+ Py_DECREF(key);
+ if (args == NULL) {
+ goto onError;
+ }
+ args_owned = 1;
+ arglen = -1;
+ argidx = -2;
+ }
+ while (--fmtcnt >= 0) {
+ switch (c = *fmt++) {
+ case '-': flags |= F_LJUST; continue;
+ case '+': flags |= F_SIGN; continue;
+ case ' ': flags |= F_BLANK; continue;
+ case '#': flags |= F_ALT; continue;
+ case '0': flags |= F_ZERO; continue;
+ }
+ break;
+ }
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto onError;
+ }
+ width = PyLong_AsLong(v);
+ if (width == -1 && PyErr_Occurred())
+ goto onError;
+ if (width < 0) {
+ flags |= F_LJUST;
+ width = -width;
+ }
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= '0' && c <= '9') {
+ width = c - '0';
+ while (--fmtcnt >= 0) {
+ c = *fmt++;
+ if (c < '0' || c > '9')
+ break;
+ if ((width*10) / 10 != width) {
+ PyErr_SetString(PyExc_ValueError,
+ "width too big");
+ goto onError;
+ }
+ width = width*10 + (c - '0');
+ }
+ }
+ if (c == '.') {
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ if (c == '*') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ if (!PyLong_Check(v)) {
+ PyErr_SetString(PyExc_TypeError,
+ "* wants int");
+ goto onError;
+ }
+ prec = PyLong_AsLong(v);
+ if (prec == -1 && PyErr_Occurred())
+ goto onError;
+ if (prec < 0)
+ prec = 0;
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ else if (c >= '0' && c <= '9') {
+ prec = c - '0';
+ while (--fmtcnt >= 0) {
+ c = Py_CHARMASK(*fmt++);
+ if (c < '0' || c > '9')
+ break;
+ if ((prec*10) / 10 != prec) {
+ PyErr_SetString(PyExc_ValueError,
+ "prec too big");
+ goto onError;
+ }
+ prec = prec*10 + (c - '0');
+ }
+ }
+ } /* prec */
+ if (fmtcnt >= 0) {
+ if (c == 'h' || c == 'l' || c == 'L') {
+ if (--fmtcnt >= 0)
+ c = *fmt++;
+ }
+ }
+ if (fmtcnt < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "incomplete format");
+ goto onError;
+ }
+ if (c != '%') {
+ v = getnextarg(args, arglen, &argidx);
+ if (v == NULL)
+ goto onError;
+ }
+ sign = 0;
+ fill = ' ';
+ switch (c) {
+
+ case '%':
+ pbuf = formatbuf;
+ /* presume that buffer length is at least 1 */
+ pbuf[0] = '%';
+ len = 1;
+ break;
+
+ case 's':
+ case 'r':
+ case 'a':
+ if (PyUnicode_Check(v) && c == 's') {
+ temp = v;
+ Py_INCREF(temp);
+ }
+ else {
+ if (c == 's')
+ temp = PyObject_Str(v);
+ else if (c == 'r')
+ temp = PyObject_Repr(v);
+ else
+ temp = PyObject_ASCII(v);
+ if (temp == NULL)
+ goto onError;
if (PyUnicode_Check(temp))
/* nothing to do */;
- else {
- Py_DECREF(temp);
- PyErr_SetString(PyExc_TypeError,
- "%s argument has non-string str()");
- goto onError;
- }
- }
- pbuf = PyUnicode_AS_UNICODE(temp);
- len = PyUnicode_GET_SIZE(temp);
- if (prec >= 0 && len > prec)
- len = prec;
- break;
-
- case 'i':
- case 'd':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- if (c == 'i')
- c = 'd';
- isnumok = 0;
- if (PyNumber_Check(v)) {
- PyObject *iobj=NULL;
-
- if (PyLong_Check(v)) {
- iobj = v;
- Py_INCREF(iobj);
- }
- else {
- iobj = PyNumber_Long(v);
- }
- if (iobj!=NULL) {
- if (PyLong_Check(iobj)) {
- isnumok = 1;
- temp = formatlong(iobj, flags, prec, c);
- Py_DECREF(iobj);
- if (!temp)
- goto onError;
- pbuf = PyUnicode_AS_UNICODE(temp);
- len = PyUnicode_GET_SIZE(temp);
- sign = 1;
- }
- else {
- Py_DECREF(iobj);
- }
- }
- }
- if (!isnumok) {
- PyErr_Format(PyExc_TypeError,
- "%%%c format: a number is required, "
+ else {
+ Py_DECREF(temp);
+ PyErr_SetString(PyExc_TypeError,
+ "%s argument has non-string str()");
+ goto onError;
+ }
+ }
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
+ if (prec >= 0 && len > prec)
+ len = prec;
+ break;
+
+ case 'i':
+ case 'd':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ if (c == 'i')
+ c = 'd';
+ isnumok = 0;
+ if (PyNumber_Check(v)) {
+ PyObject *iobj=NULL;
+
+ if (PyLong_Check(v)) {
+ iobj = v;
+ Py_INCREF(iobj);
+ }
+ else {
+ iobj = PyNumber_Long(v);
+ }
+ if (iobj!=NULL) {
+ if (PyLong_Check(iobj)) {
+ isnumok = 1;
+ temp = formatlong(iobj, flags, prec, c);
+ Py_DECREF(iobj);
+ if (!temp)
+ goto onError;
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
+ sign = 1;
+ }
+ else {
+ Py_DECREF(iobj);
+ }
+ }
+ }
+ if (!isnumok) {
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a number is required, "
"not %.200s", (char)c, Py_TYPE(v)->tp_name);
- goto onError;
- }
- if (flags & F_ZERO)
- fill = '0';
- break;
-
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- if (c == 'F')
- c = 'f';
- pbuf = formatbuf;
- len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
- flags, prec, c, v);
- if (len < 0)
- goto onError;
- sign = 1;
- if (flags & F_ZERO)
- fill = '0';
- break;
-
- case 'c':
- pbuf = formatbuf;
- len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
- if (len < 0)
- goto onError;
- break;
-
- default:
- PyErr_Format(PyExc_ValueError,
- "unsupported format character '%c' (0x%x) "
- "at index %zd",
- (31<=c && c<=126) ? (char)c : '?',
+ goto onError;
+ }
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ if (c == 'F')
+ c = 'f';
+ pbuf = formatbuf;
+ len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+ flags, prec, c, v);
+ if (len < 0)
+ goto onError;
+ sign = 1;
+ if (flags & F_ZERO)
+ fill = '0';
+ break;
+
+ case 'c':
+ pbuf = formatbuf;
+ len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
+ if (len < 0)
+ goto onError;
+ break;
+
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "unsupported format character '%c' (0x%x) "
+ "at index %zd",
+ (31<=c && c<=126) ? (char)c : '?',
(int)c,
- (Py_ssize_t)(fmt - 1 -
- PyUnicode_AS_UNICODE(uformat)));
- goto onError;
- }
- if (sign) {
- if (*pbuf == '-' || *pbuf == '+') {
- sign = *pbuf++;
- len--;
- }
- else if (flags & F_SIGN)
- sign = '+';
- else if (flags & F_BLANK)
- sign = ' ';
- else
- sign = 0;
- }
- if (width < len)
- width = len;
- if (rescnt - (sign != 0) < width) {
- reslen -= rescnt;
- rescnt = width + fmtcnt + 100;
- reslen += rescnt;
- if (reslen < 0) {
- Py_XDECREF(temp);
- PyErr_NoMemory();
- goto onError;
- }
- if (_PyUnicode_Resize(&result, reslen) < 0) {
- Py_XDECREF(temp);
- goto onError;
- }
- res = PyUnicode_AS_UNICODE(result)
- + reslen - rescnt;
- }
- if (sign) {
- if (fill != ' ')
- *res++ = sign;
- rescnt--;
- if (width > len)
- width--;
- }
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- if (fill != ' ') {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- rescnt -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- len -= 2;
- }
- if (width > len && !(flags & F_LJUST)) {
- do {
- --rescnt;
- *res++ = fill;
- } while (--width > len);
- }
- if (fill == ' ') {
- if (sign)
- *res++ = sign;
- if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
- assert(pbuf[0] == '0');
- assert(pbuf[1] == c);
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- }
- }
- Py_UNICODE_COPY(res, pbuf, len);
- res += len;
- rescnt -= len;
- while (--width >= len) {
- --rescnt;
- *res++ = ' ';
- }
- if (dict && (argidx < arglen) && c != '%') {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
+ (Py_ssize_t)(fmt - 1 -
+ PyUnicode_AS_UNICODE(uformat)));
+ goto onError;
+ }
+ if (sign) {
+ if (*pbuf == '-' || *pbuf == '+') {
+ sign = *pbuf++;
+ len--;
+ }
+ else if (flags & F_SIGN)
+ sign = '+';
+ else if (flags & F_BLANK)
+ sign = ' ';
+ else
+ sign = 0;
+ }
+ if (width < len)
+ width = len;
+ if (rescnt - (sign != 0) < width) {
+ reslen -= rescnt;
+ rescnt = width + fmtcnt + 100;
+ reslen += rescnt;
+ if (reslen < 0) {
+ Py_XDECREF(temp);
+ PyErr_NoMemory();
+ goto onError;
+ }
+ if (_PyUnicode_Resize(&result, reslen) < 0) {
+ Py_XDECREF(temp);
+ goto onError;
+ }
+ res = PyUnicode_AS_UNICODE(result)
+ + reslen - rescnt;
+ }
+ if (sign) {
+ if (fill != ' ')
+ *res++ = sign;
+ rescnt--;
+ if (width > len)
+ width--;
+ }
+ if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
+ do {
+ --rescnt;
+ *res++ = fill;
+ } while (--width > len);
+ }
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
+ Py_UNICODE_COPY(res, pbuf, len);
+ res += len;
+ rescnt -= len;
+ while (--width >= len) {
+ --rescnt;
+ *res++ = ' ';
+ }
+ if (dict && (argidx < arglen) && c != '%') {
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
Py_XDECREF(temp);
- goto onError;
- }
- Py_XDECREF(temp);
- } /* '%' */
+ goto onError;
+ }
+ Py_XDECREF(temp);
+ } /* '%' */
} /* until end */
if (argidx < arglen && !dict) {
- PyErr_SetString(PyExc_TypeError,
- "not all arguments converted during string formatting");
- goto onError;
+ PyErr_SetString(PyExc_TypeError,
+ "not all arguments converted during string formatting");
+ goto onError;
}
if (_PyUnicode_Resize(&result, reslen - rescnt) < 0)
- goto onError;
+ goto onError;
if (args_owned) {
- Py_DECREF(args);
+ Py_DECREF(args);
}
Py_DECREF(uformat);
return (PyObject *)result;
Py_XDECREF(result);
Py_DECREF(uformat);
if (args_owned) {
- Py_DECREF(args);
+ Py_DECREF(args);
}
return NULL;
}
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyObject *x = NULL;
- static char *kwlist[] = {"object", "encoding", "errors", 0};
- char *encoding = NULL;
- char *errors = NULL;
-
- if (type != &PyUnicode_Type)
- return unicode_subtype_new(type, args, kwds);
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
- kwlist, &x, &encoding, &errors))
- return NULL;
- if (x == NULL)
- return (PyObject *)_PyUnicode_New(0);
- if (encoding == NULL && errors == NULL)
- return PyObject_Str(x);
- else
- return PyUnicode_FromEncodedObject(x, encoding, errors);
+ static char *kwlist[] = {"object", "encoding", "errors", 0};
+ char *encoding = NULL;
+ char *errors = NULL;
+
+ if (type != &PyUnicode_Type)
+ return unicode_subtype_new(type, args, kwds);
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
+ kwlist, &x, &encoding, &errors))
+ return NULL;
+ if (x == NULL)
+ return (PyObject *)_PyUnicode_New(0);
+ if (encoding == NULL && errors == NULL)
+ return PyObject_Str(x);
+ else
+ return PyUnicode_FromEncodedObject(x, encoding, errors);
}
static PyObject *
unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
- PyUnicodeObject *tmp, *pnew;
- Py_ssize_t n;
-
- assert(PyType_IsSubtype(type, &PyUnicode_Type));
- tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
- if (tmp == NULL)
- return NULL;
- assert(PyUnicode_Check(tmp));
- pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
- if (pnew == NULL) {
- Py_DECREF(tmp);
- return NULL;
- }
- pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1));
- if (pnew->str == NULL) {
- _Py_ForgetReference((PyObject *)pnew);
- PyObject_Del(pnew);
- Py_DECREF(tmp);
- return PyErr_NoMemory();
- }
- Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
- pnew->length = n;
- pnew->hash = tmp->hash;
- Py_DECREF(tmp);
- return (PyObject *)pnew;
+ PyUnicodeObject *tmp, *pnew;
+ Py_ssize_t n;
+
+ assert(PyType_IsSubtype(type, &PyUnicode_Type));
+ tmp = (PyUnicodeObject *)unicode_new(&PyUnicode_Type, args, kwds);
+ if (tmp == NULL)
+ return NULL;
+ assert(PyUnicode_Check(tmp));
+ pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
+ if (pnew == NULL) {
+ Py_DECREF(tmp);
+ return NULL;
+ }
+ pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1));
+ if (pnew->str == NULL) {
+ _Py_ForgetReference((PyObject *)pnew);
+ PyObject_Del(pnew);
+ Py_DECREF(tmp);
+ return PyErr_NoMemory();
+ }
+ Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
+ pnew->length = n;
+ pnew->hash = tmp->hash;
+ Py_DECREF(tmp);
+ return (PyObject *)pnew;
}
PyDoc_STRVAR(unicode_doc,
PyTypeObject PyUnicode_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str", /* tp_name */
- sizeof(PyUnicodeObject), /* tp_size */
- 0, /* tp_itemsize */
+ "str", /* tp_name */
+ sizeof(PyUnicodeObject), /* tp_size */
+ 0, /* tp_itemsize */
/* Slots */
- (destructor)unicode_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- unicode_repr, /* tp_repr */
- &unicode_as_number, /* tp_as_number */
- &unicode_as_sequence, /* tp_as_sequence */
- &unicode_as_mapping, /* tp_as_mapping */
- (hashfunc) unicode_hash, /* tp_hash*/
- 0, /* tp_call*/
- (reprfunc) unicode_str, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
- Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
- unicode_doc, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- PyUnicode_RichCompare, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- unicode_iter, /* tp_iter */
- 0, /* tp_iternext */
- unicode_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- &PyBaseObject_Type, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- 0, /* tp_init */
- 0, /* tp_alloc */
- unicode_new, /* tp_new */
- PyObject_Del, /* tp_free */
+ (destructor)unicode_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ unicode_repr, /* tp_repr */
+ &unicode_as_number, /* tp_as_number */
+ &unicode_as_sequence, /* tp_as_sequence */
+ &unicode_as_mapping, /* tp_as_mapping */
+ (hashfunc) unicode_hash, /* tp_hash*/
+ 0, /* tp_call*/
+ (reprfunc) unicode_str, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
+ Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
+ unicode_doc, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ PyUnicode_RichCompare, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ unicode_iter, /* tp_iter */
+ 0, /* tp_iternext */
+ unicode_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ &PyBaseObject_Type, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ unicode_new, /* tp_new */
+ PyObject_Del, /* tp_free */
};
/* Initialize the Unicode implementation */
numfree = 0;
unicode_empty = _PyUnicode_New(0);
if (!unicode_empty)
- return;
+ return;
for (i = 0; i < 256; i++)
- unicode_latin1[i] = NULL;
+ unicode_latin1[i] = NULL;
if (PyType_Ready(&PyUnicode_Type) < 0)
- Py_FatalError("Can't initialize 'unicode'");
+ Py_FatalError("Can't initialize 'unicode'");
/* initialize the linebreak bloom filter */
bloom_linebreak = make_bloom_mask(
PyUnicodeObject *u;
for (u = free_list; u != NULL;) {
- PyUnicodeObject *v = u;
- u = *(PyUnicodeObject **)u;
- if (v->str)
- PyObject_DEL(v->str);
- Py_XDECREF(v->defenc);
- PyObject_Del(v);
- numfree--;
+ PyUnicodeObject *v = u;
+ u = *(PyUnicodeObject **)u;
+ if (v->str)
+ PyObject_DEL(v->str);
+ Py_XDECREF(v->defenc);
+ PyObject_Del(v);
+ numfree--;
}
free_list = NULL;
assert(numfree == 0);
unicode_empty = NULL;
for (i = 0; i < 256; i++) {
- if (unicode_latin1[i]) {
- Py_DECREF(unicode_latin1[i]);
- unicode_latin1[i] = NULL;
- }
+ if (unicode_latin1[i]) {
+ Py_DECREF(unicode_latin1[i]);
+ unicode_latin1[i] = NULL;
+ }
}
(void)PyUnicode_ClearFreeList();
}
void
PyUnicode_InternInPlace(PyObject **p)
{
- register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
- PyObject *t;
- if (s == NULL || !PyUnicode_Check(s))
- Py_FatalError(
- "PyUnicode_InternInPlace: unicode strings only please!");
- /* If it's a subclass, we don't really know what putting
- it in the interned dict might do. */
- if (!PyUnicode_CheckExact(s))
- return;
- if (PyUnicode_CHECK_INTERNED(s))
- return;
- if (interned == NULL) {
- interned = PyDict_New();
- if (interned == NULL) {
- PyErr_Clear(); /* Don't leave an exception */
- return;
- }
- }
- /* It might be that the GetItem call fails even
- though the key is present in the dictionary,
- namely when this happens during a stack overflow. */
- Py_ALLOW_RECURSION
- t = PyDict_GetItem(interned, (PyObject *)s);
- Py_END_ALLOW_RECURSION
-
- if (t) {
- Py_INCREF(t);
- Py_DECREF(*p);
- *p = t;
- return;
- }
-
- PyThreadState_GET()->recursion_critical = 1;
- if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
- PyErr_Clear();
- PyThreadState_GET()->recursion_critical = 0;
- return;
- }
- PyThreadState_GET()->recursion_critical = 0;
- /* The two references in interned are not counted by refcnt.
- The deallocator will take care of this */
- Py_REFCNT(s) -= 2;
- PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
+ register PyUnicodeObject *s = (PyUnicodeObject *)(*p);
+ PyObject *t;
+ if (s == NULL || !PyUnicode_Check(s))
+ Py_FatalError(
+ "PyUnicode_InternInPlace: unicode strings only please!");
+ /* If it's a subclass, we don't really know what putting
+ it in the interned dict might do. */
+ if (!PyUnicode_CheckExact(s))
+ return;
+ if (PyUnicode_CHECK_INTERNED(s))
+ return;
+ if (interned == NULL) {
+ interned = PyDict_New();
+ if (interned == NULL) {
+ PyErr_Clear(); /* Don't leave an exception */
+ return;
+ }
+ }
+ /* It might be that the GetItem call fails even
+ though the key is present in the dictionary,
+ namely when this happens during a stack overflow. */
+ Py_ALLOW_RECURSION
+ t = PyDict_GetItem(interned, (PyObject *)s);
+ Py_END_ALLOW_RECURSION
+
+ if (t) {
+ Py_INCREF(t);
+ Py_DECREF(*p);
+ *p = t;
+ return;
+ }
+
+ PyThreadState_GET()->recursion_critical = 1;
+ if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
+ PyErr_Clear();
+ PyThreadState_GET()->recursion_critical = 0;
+ return;
+ }
+ PyThreadState_GET()->recursion_critical = 0;
+ /* The two references in interned are not counted by refcnt.
+ The deallocator will take care of this */
+ Py_REFCNT(s) -= 2;
+ PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
}
void
PyUnicode_InternImmortal(PyObject **p)
{
- PyUnicode_InternInPlace(p);
- if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
- PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
- Py_INCREF(*p);
- }
+ PyUnicode_InternInPlace(p);
+ if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
+ PyUnicode_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
+ Py_INCREF(*p);
+ }
}
PyObject *
PyUnicode_InternFromString(const char *cp)
{
- PyObject *s = PyUnicode_FromString(cp);
- if (s == NULL)
- return NULL;
- PyUnicode_InternInPlace(&s);
- return s;
+ PyObject *s = PyUnicode_FromString(cp);
+ if (s == NULL)
+ return NULL;
+ PyUnicode_InternInPlace(&s);
+ return s;
}
void _Py_ReleaseInternedUnicodeStrings(void)
{
- PyObject *keys;
- PyUnicodeObject *s;
- Py_ssize_t i, n;
- Py_ssize_t immortal_size = 0, mortal_size = 0;
-
- if (interned == NULL || !PyDict_Check(interned))
- return;
- keys = PyDict_Keys(interned);
- if (keys == NULL || !PyList_Check(keys)) {
- PyErr_Clear();
- return;
- }
-
- /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
- detector, interned unicode strings are not forcibly deallocated;
- rather, we give them their stolen references back, and then clear
- and DECREF the interned dict. */
-
- n = PyList_GET_SIZE(keys);
- fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
- n);
- for (i = 0; i < n; i++) {
- s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
- switch (s->state) {
- case SSTATE_NOT_INTERNED:
- /* XXX Shouldn't happen */
- break;
- case SSTATE_INTERNED_IMMORTAL:
- Py_REFCNT(s) += 1;
- immortal_size += s->length;
- break;
- case SSTATE_INTERNED_MORTAL:
- Py_REFCNT(s) += 2;
- mortal_size += s->length;
- break;
- default:
- Py_FatalError("Inconsistent interned string state.");
- }
- s->state = SSTATE_NOT_INTERNED;
- }
- fprintf(stderr, "total size of all interned strings: "
- "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
- "mortal/immortal\n", mortal_size, immortal_size);
- Py_DECREF(keys);
- PyDict_Clear(interned);
- Py_DECREF(interned);
- interned = NULL;
+ PyObject *keys;
+ PyUnicodeObject *s;
+ Py_ssize_t i, n;
+ Py_ssize_t immortal_size = 0, mortal_size = 0;
+
+ if (interned == NULL || !PyDict_Check(interned))
+ return;
+ keys = PyDict_Keys(interned);
+ if (keys == NULL || !PyList_Check(keys)) {
+ PyErr_Clear();
+ return;
+ }
+
+ /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak
+ detector, interned unicode strings are not forcibly deallocated;
+ rather, we give them their stolen references back, and then clear
+ and DECREF the interned dict. */
+
+ n = PyList_GET_SIZE(keys);
+ fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
+ n);
+ for (i = 0; i < n; i++) {
+ s = (PyUnicodeObject *) PyList_GET_ITEM(keys, i);
+ switch (s->state) {
+ case SSTATE_NOT_INTERNED:
+ /* XXX Shouldn't happen */
+ break;
+ case SSTATE_INTERNED_IMMORTAL:
+ Py_REFCNT(s) += 1;
+ immortal_size += s->length;
+ break;
+ case SSTATE_INTERNED_MORTAL:
+ Py_REFCNT(s) += 2;
+ mortal_size += s->length;
+ break;
+ default:
+ Py_FatalError("Inconsistent interned string state.");
+ }
+ s->state = SSTATE_NOT_INTERNED;
+ }
+ fprintf(stderr, "total size of all interned strings: "
+ "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
+ "mortal/immortal\n", mortal_size, immortal_size);
+ Py_DECREF(keys);
+ PyDict_Clear(interned);
+ Py_DECREF(interned);
+ interned = NULL;
}
/********************* Unicode Iterator **************************/
typedef struct {
- PyObject_HEAD
- Py_ssize_t it_index;
- PyUnicodeObject *it_seq; /* Set to NULL when iterator is exhausted */
+ PyObject_HEAD
+ Py_ssize_t it_index;
+ PyUnicodeObject *it_seq; /* Set to NULL when iterator is exhausted */
} unicodeiterobject;
static void
unicodeiter_dealloc(unicodeiterobject *it)
{
- _PyObject_GC_UNTRACK(it);
- Py_XDECREF(it->it_seq);
- PyObject_GC_Del(it);
+ _PyObject_GC_UNTRACK(it);
+ Py_XDECREF(it->it_seq);
+ PyObject_GC_Del(it);
}
static int
unicodeiter_traverse(unicodeiterobject *it, visitproc visit, void *arg)
{
- Py_VISIT(it->it_seq);
- return 0;
+ Py_VISIT(it->it_seq);
+ return 0;
}
static PyObject *
unicodeiter_next(unicodeiterobject *it)
{
- PyUnicodeObject *seq;
- PyObject *item;
+ PyUnicodeObject *seq;
+ PyObject *item;
- assert(it != NULL);
- seq = it->it_seq;
- if (seq == NULL)
- return NULL;
- assert(PyUnicode_Check(seq));
+ assert(it != NULL);
+ seq = it->it_seq;
+ if (seq == NULL)
+ return NULL;
+ assert(PyUnicode_Check(seq));
- if (it->it_index < PyUnicode_GET_SIZE(seq)) {
- item = PyUnicode_FromUnicode(
+ if (it->it_index < PyUnicode_GET_SIZE(seq)) {
+ item = PyUnicode_FromUnicode(
PyUnicode_AS_UNICODE(seq)+it->it_index, 1);
- if (item != NULL)
- ++it->it_index;
- return item;
- }
+ if (item != NULL)
+ ++it->it_index;
+ return item;
+ }
- Py_DECREF(seq);
- it->it_seq = NULL;
- return NULL;
+ Py_DECREF(seq);
+ it->it_seq = NULL;
+ return NULL;
}
static PyObject *
unicodeiter_len(unicodeiterobject *it)
{
- Py_ssize_t len = 0;
- if (it->it_seq)
- len = PyUnicode_GET_SIZE(it->it_seq) - it->it_index;
- return PyLong_FromSsize_t(len);
+ Py_ssize_t len = 0;
+ if (it->it_seq)
+ len = PyUnicode_GET_SIZE(it->it_seq) - it->it_index;
+ return PyLong_FromSsize_t(len);
}
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
static PyMethodDef unicodeiter_methods[] = {
- {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS,
+ {"__length_hint__", (PyCFunction)unicodeiter_len, METH_NOARGS,
length_hint_doc},
- {NULL, NULL} /* sentinel */
+ {NULL, NULL} /* sentinel */
};
PyTypeObject PyUnicodeIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "str_iterator", /* tp_name */
- sizeof(unicodeiterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)unicodeiter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
- 0, /* tp_doc */
- (traverseproc)unicodeiter_traverse, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)unicodeiter_next, /* tp_iternext */
- unicodeiter_methods, /* tp_methods */
- 0,
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "str_iterator", /* tp_name */
+ sizeof(unicodeiterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)unicodeiter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)unicodeiter_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)unicodeiter_next, /* tp_iternext */
+ unicodeiter_methods, /* tp_methods */
+ 0,
};
static PyObject *
unicode_iter(PyObject *seq)
{
- unicodeiterobject *it;
-
- if (!PyUnicode_Check(seq)) {
- PyErr_BadInternalCall();
- return NULL;
- }
- it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type);
- if (it == NULL)
- return NULL;
- it->it_index = 0;
- Py_INCREF(seq);
- it->it_seq = (PyUnicodeObject *)seq;
- _PyObject_GC_TRACK(it);
- return (PyObject *)it;
+ unicodeiterobject *it;
+
+ if (!PyUnicode_Check(seq)) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ it = PyObject_GC_New(unicodeiterobject, &PyUnicodeIter_Type);
+ if (it == NULL)
+ return NULL;
+ it->it_index = 0;
+ Py_INCREF(seq);
+ it->it_seq = (PyUnicodeObject *)seq;
+ _PyObject_GC_TRACK(it);
+ return (PyObject *)it;
}
size_t