self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
+ # Test PyUnicode_AsUTF8()
+ @support.cpython_only
+ def test_asutf8(self):
+ from _testcapi import unicode_asutf8
+
+ bmp = '\u0100'
+ bmp2 = '\uffff'
+ nonbmp = chr(0x10ffff)
+
+ self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
+ self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
+ self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
+ self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
+
+ # Test PyUnicode_AsUTF8AndSize()
+ @support.cpython_only
+ def test_asutf8andsize(self):
+ from _testcapi import unicode_asutf8andsize
+
+ bmp = '\u0100'
+ bmp2 = '\uffff'
+ nonbmp = chr(0x10ffff)
+
+ self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
+ self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
+ self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
+ self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
+
# Test PyUnicode_FindChar()
@support.cpython_only
def test_findchar(self):
return result;
}
+static PyObject *
+unicode_asutf8(PyObject *self, PyObject *args)
+{
+ PyObject *unicode;
+ const char *buffer;
+
+ if (!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8(unicode);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ return PyBytes_FromString(buffer);
+}
+
+static PyObject *
+unicode_asutf8andsize(PyObject *self, PyObject *args)
+{
+ PyObject *unicode, *result;
+ const char *buffer;
+ Py_ssize_t utf8_len;
+
+ if(!PyArg_ParseTuple(args, "U", &unicode)) {
+ return NULL;
+ }
+
+ buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
+ if (buffer == NULL) {
+ return NULL;
+ }
+
+ result = PyBytes_FromString(buffer);
+ if (result == NULL) {
+ return NULL;
+ }
+
+ return Py_BuildValue("(Nn)", result, utf8_len);
+}
+
static PyObject *
unicode_findchar(PyObject *self, PyObject *args)
{
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
+ {"unicode_asutf8", unicode_asutf8, METH_VARARGS},
+ {"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
{"unicode_findchar", unicode_findchar, METH_VARARGS},
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},