s += "4"
self.assertEqual(s, "3")
+ def test_encode_decimal(self):
+ from _testcapi import unicode_encodedecimal
+ self.assertEqual(unicode_encodedecimal('123'),
+ b'123')
+ self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+ b'3.14')
+ self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ b' 3.14 ')
+ self.assertRaises(UnicodeEncodeError,
+ unicode_encodedecimal, "123\u20ac", "strict")
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
+ b'123?')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
+ b'123')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
+ b'123€')
+ self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
+ b'123\\u20ac')
+
+ def test_transform_decimal(self):
+ from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+ self.assertEqual(transform_decimal('123'),
+ '123')
+ self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+ '3.14')
+ self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+ "\N{EM SPACE}3.14\N{EN SPACE}")
+ self.assertEqual(transform_decimal('123\u20ac'),
+ '123\u20ac')
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
return Py_BuildValue("(Nn)", result, size);
}
+static PyObject *
+unicode_encodedecimal(PyObject *self, PyObject *args)
+{
+ Py_UNICODE *unicode;
+ Py_ssize_t length;
+ char *errors = NULL;
+ PyObject *decimal;
+ Py_ssize_t decimal_length, new_length;
+ int res;
+
+ if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors))
+ return NULL;
+
+ decimal_length = length * 7; /* len('€') */
+ decimal = PyBytes_FromStringAndSize(NULL, decimal_length);
+ if (decimal == NULL)
+ return NULL;
+
+ res = PyUnicode_EncodeDecimal(unicode, length,
+ PyBytes_AS_STRING(decimal),
+ errors);
+ if (res < 0) {
+ Py_DECREF(decimal);
+ return NULL;
+ }
+
+ new_length = strlen(PyBytes_AS_STRING(decimal));
+ assert(new_length <= decimal_length);
+ res = _PyBytes_Resize(&decimal, new_length);
+ if (res < 0)
+ return NULL;
+
+ return decimal;
+}
+
+static PyObject *
+unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
+{
+ Py_UNICODE *unicode;
+ Py_ssize_t length;
+ if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length))
+ return NULL;
+ return PyUnicode_TransformDecimalToASCII(unicode, length);
+}
+
static PyObject *
getargs_w_star(PyObject *self, PyObject *args)
{
{"test_u_code", (PyCFunction)test_u_code, METH_NOARGS},
{"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS},
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
- {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
- {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
+ {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
+ {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
+ {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
+ {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
#ifdef WITH_THREAD
{"_test_thread_state", test_thread_state, METH_VARARGS},
{"_pending_threadfunc", pending_threadfunc, METH_VARARGS},
char *output,
const char *errors)
{
- Py_UNICODE *p, *end;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *unicode;
/* the following variable is used for caching string comparisons
* -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
int known_errorHandler = -1;
+ Py_ssize_t i, j;
+ enum PyUnicode_Kind kind;
+ void *data;
if (output == NULL) {
PyErr_BadArgument();
return -1;
}
- p = s;
- end = s + length;
- while (p < end) {
- register Py_UNICODE ch = *p;
+ unicode = PyUnicode_FromUnicode(s, length);
+ if (unicode == NULL)
+ return -1;
+
+ if (PyUnicode_READY(unicode) < 0)
+ goto onError;
+ kind = PyUnicode_KIND(unicode);
+ data = PyUnicode_DATA(unicode);
+
+ for (i=0; i < length; i++) {
+ Py_UCS4 ch = PyUnicode_READ(kind, data, i);
int decimal;
- PyObject *repunicode;
- Py_ssize_t repsize;
- Py_ssize_t newpos;
- Py_UNICODE *uni2;
- Py_UNICODE *collstart;
- Py_UNICODE *collend;
+ Py_ssize_t startpos, endpos;
if (Py_UNICODE_ISSPACE(ch)) {
*output++ = ' ';
- ++p;
continue;
}
decimal = Py_UNICODE_TODECIMAL(ch);
if (decimal >= 0) {
*output++ = '0' + decimal;
- ++p;
continue;
}
if (0 < ch && ch < 256) {
*output++ = (char)ch;
- ++p;
continue;
}
/* All other characters are considered unencodable */
- collstart = p;
- collend = p+1;
- while (collend < end) {
- if ((0 < *collend && *collend < 256) ||
- !Py_UNICODE_ISSPACE(*collend) ||
- Py_UNICODE_TODECIMAL(*collend))
+ startpos = i;
+ endpos = i+1;
+ for (; endpos < length; endpos++) {
+ ch = PyUnicode_READ(kind, data, endpos);
+ if ((0 < ch && ch < 256) ||
+ !Py_UNICODE_ISSPACE(ch) ||
+ Py_UNICODE_TODECIMAL(ch))
break;
}
/* cache callback name lookup
}
switch (known_errorHandler) {
case 1: /* strict */
- unicode = PyUnicode_FromUnicode(s, length);
- if (unicode == NULL)
- goto onError;
- raise_encode_exception(&exc, encoding, unicode, collstart-s, collend-s, reason);
- Py_DECREF(unicode);
+ raise_encode_exception(&exc, encoding, unicode, startpos, endpos, reason);
goto onError;
case 2: /* replace */
- for (p = collstart; p < collend; ++p)
+ for (j=startpos; j < endpos; j++)
*output++ = '?';
/* fall through */
case 3: /* ignore */
- p = collend;
+ i = endpos;
break;
case 4: /* xmlcharrefreplace */
- /* generate replacement (temporarily (mis)uses p) */
- for (p = collstart; p < collend; ++p)
- output += sprintf(output, "&#%d;", (int)*p);
- p = collend;
+ /* generate replacement */
+ for (j=startpos; j < endpos; j++) {
+ ch = PyUnicode_READ(kind, data, i);
+ output += sprintf(output, "&#%d;", (int)ch);
+ i++;
+ }
break;
default:
- unicode = PyUnicode_FromUnicode(s, length);
- if (unicode == NULL)
- goto onError;
+ {
+ PyObject *repunicode;
+ Py_ssize_t repsize, newpos, k;
+ enum PyUnicode_Kind repkind;
+ void *repdata;
+
repunicode = unicode_encode_call_errorhandler(errors, &errorHandler,
encoding, reason, unicode, &exc,
- collstart-s, collend-s, &newpos);
- Py_DECREF(unicode);
+ startpos, endpos, &newpos);
if (repunicode == NULL)
goto onError;
if (!PyUnicode_Check(repunicode)) {
Py_DECREF(repunicode);
goto onError;
}
+ if (PyUnicode_READY(repunicode) < 0) {
+ Py_DECREF(repunicode);
+ goto onError;
+ }
+ repkind = PyUnicode_KIND(repunicode);
+ repdata = PyUnicode_DATA(repunicode);
+
/* generate replacement */
repsize = PyUnicode_GET_SIZE(repunicode);
- for (uni2 = PyUnicode_AS_UNICODE(repunicode); repsize-->0; ++uni2) {
- Py_UNICODE ch = *uni2;
+ for (k=0; k<repsize; k++) {
+ ch = PyUnicode_READ(repkind, repdata, k);
if (Py_UNICODE_ISSPACE(ch))
*output++ = ' ';
else {
*output++ = (char)ch;
else {
Py_DECREF(repunicode);
- unicode = PyUnicode_FromUnicode(s, length);
- if (unicode == NULL)
- goto onError;
raise_encode_exception(&exc, encoding,
- unicode, collstart-s, collend-s, reason);
- Py_DECREF(unicode);
+ unicode, startpos, endpos,
+ reason);
goto onError;
}
}
}
- p = s + newpos;
+ i = newpos;
Py_DECREF(repunicode);
}
+ }
}
/* 0-terminate the output string */
*output++ = '\0';
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
+ Py_DECREF(unicode);
return 0;
onError:
Py_XDECREF(exc);
Py_XDECREF(errorHandler);
+ Py_DECREF(unicode);
return -1;
}