.. index:: pair: string; methods
-Below are listed the string methods which both 8-bit strings and Unicode objects
-support. Note that none of these methods take keyword arguments.
+Below are listed the string methods which both 8-bit strings and
+Unicode objects support.
In addition, Python's strings support the sequence type methods
described in the :ref:`typesseq` section. To output formatted strings
.. versionchanged:: 2.3
Support for other error handling schemes added.
+ .. versionchanged:: 2.7
+ Support for keyword arguments added.
.. method:: str.encode([encoding[,errors]])
Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error
handling schemes added.
+ .. versionchanged:: 2.7
+ Support for keyword arguments added.
.. method:: str.endswith(suffix[, start[, end]])
def test_buffer_is_readonly(self):
self.assertRaises(TypeError, sys.stdin.readinto, b"")
+ def test_encode_and_decode_kwargs(self):
+ self.assertEqual('abcde'.encode('ascii', 'replace'),
+ 'abcde'.encode('ascii', errors='replace'))
+ self.assertEqual('abcde'.encode('ascii', 'ignore'),
+ 'abcde'.encode(encoding='ascii', errors='ignore'))
+ self.assertEqual('Andr\202 x'.decode('ascii', 'ignore'),
+ 'Andr\202 x'.decode('ascii', errors='ignore'))
+ self.assertEqual('Andr\202 x'.decode('ascii', 'replace'),
+ 'Andr\202 x'.decode(encoding='ascii', errors='replace'))
+
+
def test_main():
test_support.run_unittest(StrTest)
self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
+ self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
+ u'Andr\202 x'.encode('ascii', errors='replace'))
+ self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
+ u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
# Error handling (decoding)
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
+ self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
+ u'abcde'.decode('ascii', errors='ignore'))
+ self.assertEqual(u'abcde'.decode('ascii', 'replace'),
+ u'abcde'.decode(encoding='ascii', errors='replace'))
# Error handling (unknown character names)
self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
Eric Bouck
Thierry Bousch
Sebastian Boving
+Jeff Bradberry
Monty Brandenberg
Georg Brandl
Christopher Brannon
Core and Builtins
-----------------
+- Issue #6300: unicode.encode, unicode.docode, str.decode, and str.encode now
+ take keyword arguments.
+
- Issue #6922: Fix an infinite loop when trying to decode an invalid
UTF-32 stream with a non-raising error handler like "replace" or "ignore".
codecs.register_error that is able to handle UnicodeEncodeErrors.");
static PyObject *
-string_encode(PyStringObject *self, PyObject *args)
+string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
{
+ static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
PyObject *v;
- if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
+ kwlist, &encoding, &errors))
return NULL;
v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
able to handle UnicodeDecodeErrors.");
static PyObject *
-string_decode(PyStringObject *self, PyObject *args)
+string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
{
+ static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
PyObject *v;
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
+ kwlist, &encoding, &errors))
return NULL;
v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
- {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
- {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
+ {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
+ {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
expandtabs__doc__},
{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
codecs.register_error that can handle UnicodeEncodeErrors.");
static PyObject *
-unicode_encode(PyUnicodeObject *self, PyObject *args)
+unicode_encode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{
+ static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
PyObject *v;
- if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
+ kwlist, &encoding, &errors))
return NULL;
v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
able to handle UnicodeDecodeErrors.");
static PyObject *
-unicode_decode(PyUnicodeObject *self, PyObject *args)
+unicode_decode(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
{
+ static char *kwlist[] = {"encoding", "errors", 0};
char *encoding = NULL;
char *errors = NULL;
PyObject *v;
- if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
+ kwlist, &encoding, &errors))
return NULL;
v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors);
if (v == NULL)
/* Order is according to common usage: often used methods should
appear first, since lookup is done sequentially. */
- {"encode", (PyCFunction) unicode_encode, METH_VARARGS, encode__doc__},
+ {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
{"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__},
{"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__},
{"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__},
- {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__},
+ {"decode", (PyCFunction) unicode_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
/* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */
{"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__},
{"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},