| | | operand |
+--------------------+---------+---------------------------+
-The following codecs provide bytes-to-bytes mappings. They can be used with
-:meth:`bytes.transform` and :meth:`bytes.untransform`.
+The following codecs provide bytes-to-bytes mappings.
+--------------------+---------------------------+---------------------------+
| Codec | Aliases | Purpose |
| | | using gzip |
+--------------------+---------------------------+---------------------------+
-The following codecs provide string-to-string mappings. They can be used with
-:meth:`str.transform` and :meth:`str.untransform`.
+The following codecs provide string-to-string mappings.
+--------------------+---------------------------+---------------------------+
| Codec | Aliases | Purpose |
"They're Bill's Friends."
-.. method:: str.transform(encoding, errors='strict')
-
- Return an encoded version of the string. In contrast to :meth:`encode`, this
- method works with codecs that provide string-to-string mappings, and not
- string-to-bytes mappings. :meth:`transform` therefore returns a string
- object.
-
- The codecs that can be used with this method are listed in
- :ref:`standard-encodings`.
-
- .. versionadded:: 3.2
-
-
.. method:: str.translate(map)
Return a copy of the *s* where all characters have been mapped through the
example).
-.. method:: str.untransform(encoding, errors='strict')
-
- Return a decoded version of the string. This provides the reverse operation
- of :meth:`transform`.
-
- .. versionadded:: 3.2
-
-
.. method:: str.upper()
Return a copy of the string converted to uppercase.
The maketrans and translate methods differ in semantics from the versions
available on strings:
-.. method:: bytes.transform(encoding, errors='strict')
- bytearray.transform(encoding, errors='strict')
-
- Return an encoded version of the bytes object. In contrast to
- :meth:`encode`, this method works with codecs that provide bytes-to-bytes
- mappings, and not string-to-bytes mappings. :meth:`transform` therefore
- returns a bytes or bytearray object.
-
- The codecs that can be used with this method are listed in
- :ref:`standard-encodings`.
-
- .. versionadded:: 3.2
-
-
.. method:: bytes.translate(table[, delete])
bytearray.translate(table[, delete])
b'rd ths shrt txt'
-.. method:: bytes.untransform(encoding, errors='strict')
- bytearray.untransform(encoding, errors='strict')
-
- Return an decoded version of the bytes object. This provides the reverse
- operation of :meth:`transform`.
-
- .. versionadded:: 3.2
-
-
.. staticmethod:: bytes.maketrans(from, to)
bytearray.maketrans(from, to)
Some smaller changes made to the core Python language are:
-* :class:`bytes` and :class:`str` now have two net methods, *transform* and
- *untransform*. These provide analogues to *encode* and *decode* but are used
- for general purpose str-to-str and bytes-to-bytes transformations rather than
- Unicode codecs for bytes-to-str and str-to-bytes.
-
- Along with the new methods, several non-unicode codecs been restored from Python 2.x
- including *base64*, *bz2*, *hex*, *quopri*, *rot13*, *uu*, and *zlib*.
-
- >>> t = b'which witch had which witches wrist watch'
- >>> t.transform('quopri')
- b'which=20witch=20had=20which=20witches=20wrist=20watch'
-
- >>> short = t.transform('zlib_codec')
- >>> len(t), len(short)
- (41, 38)
- >>> short.untransform('zlib_codec')
- b'which witch had which witches wrist watch'
-
- (From multiple contributors in :issue:`7475`.)
-
* String formatting for :func:`format` and :meth:`str.format` gained new
capabilities for the format character **#**. Previously, for integers in
binary, octal, or hexadecimal, it caused the output to be prefixed with '0b',
self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
"Hello world\n")
- def test_transform(self):
- b1 = self.type2test(range(256))
- b2 = b1.transform("base64").untransform("base64")
- self.assertEqual(b2, b1)
-
def test_from_int(self):
b = self.type2test(0)
self.assertEqual(b, self.type2test())
bytes_transform_encodings.append("bz2_codec")
class TransformCodecTest(unittest.TestCase):
+
def test_basics(self):
binput = bytes(range(256))
- ainput = bytearray(binput)
for encoding in bytes_transform_encodings:
# generic codecs interface
(o, size) = codecs.getencoder(encoding)(binput)
self.assertEqual(size, len(o))
self.assertEqual(i, binput)
- # transform interface
- boutput = binput.transform(encoding)
- aoutput = ainput.transform(encoding)
- self.assertEqual(boutput, aoutput)
- self.assertIsInstance(boutput, bytes)
- self.assertIsInstance(aoutput, bytearray)
- bback = boutput.untransform(encoding)
- aback = aoutput.untransform(encoding)
- self.assertEqual(bback, aback)
- self.assertEqual(bback, binput)
- self.assertIsInstance(bback, bytes)
- self.assertIsInstance(aback, bytearray)
-
def test_read(self):
for encoding in bytes_transform_encodings:
- sin = b"\x80".transform(encoding)
+ sin = codecs.encode(b"\x80", encoding)
reader = codecs.getreader(encoding)(io.BytesIO(sin))
sout = reader.read()
self.assertEqual(sout, b"\x80")
for encoding in bytes_transform_encodings:
if encoding in ['uu_codec', 'zlib_codec']:
continue
- sin = b"\x80".transform(encoding)
+ sin = codecs.encode(b"\x80", encoding)
reader = codecs.getreader(encoding)(io.BytesIO(sin))
sout = reader.readline()
self.assertEqual(sout, b"\x80")
- Issue #9915: Speed up sorting with a key.
-- Issue #7475: Added transform() and untransform() methods to both bytes and
- string types. They can be used to access those codecs providing
- bytes-to-bytes and string-to-string mappings.
-
- Issue #8685: Speed up set difference ``a - b`` when source set ``a`` is much
larger than operand ``b``. Patch by Andrew Bennetts.
return PyUnicode_FromEncodedObject(self, encoding, errors);
}
-PyDoc_STRVAR(transform__doc__,
-"B.transform(encoding, errors='strict') -> bytearray\n\
-\n\
-Transform B using the codec registered for encoding. errors may be given\n\
-to set a different error handling scheme.");
-
-static PyObject *
-bytearray_transform(PyObject *self, PyObject *args, PyObject *kwargs)
-{
- const char *encoding = NULL;
- const char *errors = NULL;
- static char *kwlist[] = {"encoding", "errors", 0};
- PyObject *v, *w;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
- kwlist, &encoding, &errors))
- return NULL;
-
- v = PyCodec_Encode(self, encoding, errors);
- if (v == NULL)
- return NULL;
- if (!PyBytes_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
- PyBytes_GET_SIZE(v));
- Py_DECREF(v);
- return w;
-}
-
-
-PyDoc_STRVAR(untransform__doc__,
-"B.untransform(encoding, errors='strict') -> bytearray\n\
-\n\
-Reverse-transform B using the codec registered for encoding. errors may\n\
-be given to set a different error handling scheme.");
-
-static PyObject *
-bytearray_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
-{
- const char *encoding = NULL;
- const char *errors = NULL;
- static char *kwlist[] = {"encoding", "errors", 0};
- PyObject *v, *w;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
- kwlist, &encoding, &errors))
- return NULL;
-
- v = PyCodec_Decode(self, encoding, errors);
- if (v == NULL)
- return NULL;
- if (!PyBytes_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- w = PyByteArray_FromStringAndSize(PyBytes_AS_STRING(v),
- PyBytes_GET_SIZE(v));
- Py_DECREF(v);
- return w;
-}
-
PyDoc_STRVAR(alloc_doc,
"B.__alloc__() -> int\n\
\n\
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
- {"transform", (PyCFunction)bytearray_transform, METH_VARARGS | METH_KEYWORDS,
- transform__doc__},
{"translate", (PyCFunction)bytearray_translate, METH_VARARGS,
translate__doc__},
- {"untransform", (PyCFunction)bytearray_untransform, METH_VARARGS | METH_KEYWORDS,
- untransform__doc__},
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
{NULL}
return PyUnicode_FromEncodedObject(self, encoding, errors);
}
-PyDoc_STRVAR(transform__doc__,
-"B.transform(encoding, errors='strict') -> bytes\n\
-\n\
-Transform B using the codec registered for encoding. errors may be given\n\
-to set a different error handling scheme.");
-
-static PyObject *
-bytes_transform(PyObject *self, PyObject *args, PyObject *kwargs)
-{
- const char *encoding = NULL;
- const char *errors = NULL;
- static char *kwlist[] = {"encoding", "errors", 0};
- PyObject *v;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
- kwlist, &encoding, &errors))
- return NULL;
-
- v = PyCodec_Encode(self, encoding, errors);
- if (v == NULL)
- return NULL;
- if (!PyBytes_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "encoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- return v;
-}
-
-
-PyDoc_STRVAR(untransform__doc__,
-"B.untransform(encoding, errors='strict') -> bytes\n\
-\n\
-Reverse-transform B using the codec registered for encoding. errors may\n\
-be given to set a different error handling scheme.");
-
-static PyObject *
-bytes_untransform(PyObject *self, PyObject *args, PyObject *kwargs)
-{
- const char *encoding = NULL;
- const char *errors = NULL;
- static char *kwlist[] = {"encoding", "errors", 0};
- PyObject *v;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
- kwlist, &encoding, &errors))
- return NULL;
-
- v = PyCodec_Decode(self, encoding, errors);
- if (v == NULL)
- return NULL;
- if (!PyBytes_Check(v)) {
- PyErr_Format(PyExc_TypeError,
- "decoder did not return a bytes object (type=%.400s)",
- Py_TYPE(v)->tp_name);
- Py_DECREF(v);
- return NULL;
- }
- return v;
-}
PyDoc_STRVAR(splitlines__doc__,
"B.splitlines([keepends]) -> list of lines\n\
{"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
_Py_swapcase__doc__},
{"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
- {"transform", (PyCFunction)bytes_transform, METH_VARARGS | METH_KEYWORDS, transform__doc__},
{"translate", (PyCFunction)bytes_translate, METH_VARARGS,
translate__doc__},
- {"untransform", (PyCFunction)bytes_untransform, METH_VARARGS | METH_KEYWORDS, untransform__doc__},
{"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
{"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__},
{"__sizeof__", (PyCFunction)bytes_sizeof, METH_NOARGS,
return PyUnicode_AsEncodedString((PyObject *)self, encoding, errors);
}
-PyDoc_STRVAR(transform__doc__,
- "S.transform(encoding, errors='strict') -> str\n\
-\n\
-Transform S using the codec registered for encoding. errors may be given\n\
-to set a different error handling scheme.");
-
-static PyObject *
-unicode_transform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
-{
- static char *kwlist[] = {"encoding", "errors", 0};
- char *encoding = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:transform",
- kwlist, &encoding, &errors))
- return NULL;
- return PyUnicode_AsEncodedUnicode((PyObject *)self, encoding, errors);
-}
-
-PyDoc_STRVAR(untransform__doc__,
- "S.untransform(encoding, errors='strict') -> str\n\
-\n\
-Reverse-transform S using the codec registered for encoding. errors may be\n\
-given to set a different error handling scheme.");
-
-static PyObject *
-unicode_untransform(PyUnicodeObject *self, PyObject *args, PyObject *kwargs)
-{
- static char *kwlist[] = {"encoding", "errors", 0};
- char *encoding = NULL;
- char *errors = NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|s:untransform",
- kwlist, &encoding, &errors))
- return NULL;
- return PyUnicode_AsDecodedUnicode((PyObject *)self, encoding, errors);
-}
-
PyDoc_STRVAR(expandtabs__doc__,
"S.expandtabs([tabsize]) -> str\n\
\n\
/* Order is according to common usage: often used methods should
appear first, since lookup is done sequentially. */
- {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS,
- encode__doc__},
+ {"encode", (PyCFunction) unicode_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
{"replace", (PyCFunction) unicode_replace, METH_VARARGS, replace__doc__},
{"split", (PyCFunction) unicode_split, METH_VARARGS, split__doc__},
{"rsplit", (PyCFunction) unicode_rsplit, METH_VARARGS, rsplit__doc__},
{"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
{"maketrans", (PyCFunction) unicode_maketrans,
METH_VARARGS | METH_STATIC, maketrans__doc__},
- {"transform", (PyCFunction) unicode_transform, METH_VARARGS | METH_KEYWORDS,
- transform__doc__},
- {"untransform", (PyCFunction) unicode_untransform, METH_VARARGS | METH_KEYWORDS,
- untransform__doc__},
{"__sizeof__", (PyCFunction) unicode__sizeof__, METH_NOARGS, sizeof__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},