]> granicus.if.org Git - python/commitdiff
Internal module _codecs -- Provides access to the codec registry and
authorGuido van Rossum <guido@python.org>
Fri, 10 Mar 2000 23:09:23 +0000 (23:09 +0000)
committerGuido van Rossum <guido@python.org>
Fri, 10 Mar 2000 23:09:23 +0000 (23:09 +0000)
the builtin codecs.  Written by Marc-Andre Lemburg.

Modules/_codecsmodule.c [new file with mode: 0644]

diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
new file mode 100644 (file)
index 0000000..6c8a2d4
--- /dev/null
@@ -0,0 +1,529 @@
+/* ------------------------------------------------------------------------
+
+   _codecs -- Provides access to the codec registry and the builtin
+              codecs.
+
+   This module should never be imported directly. The standard library
+   module "codecs" wraps this builtin module for use within Python.
+
+   The codec registry is accessible via:
+
+     register(search_function) -> None
+
+     lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
+
+   The builtin Unicode codecs use the following interface:
+
+     <encoding>_encode(Unicode_object[,errors='strict']) -> 
+       (string object, bytes consumed)
+
+     <encoding>_decode(char_buffer_obj[,errors='strict']) -> 
+        (Unicode object, bytes consumed)
+
+   These <encoding>s are available: utf_8, unicode_escape,
+   raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit)
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+   ------------------------------------------------------------------------ */
+
+#include "Python.h"
+
+/* --- Registry ----------------------------------------------------------- */
+
+static
+PyObject *codecregister(PyObject *self, PyObject *args)
+{
+    PyObject *search_function;
+
+    if (!PyArg_ParseTuple(args, "O:register", &search_function))
+        goto onError;
+
+    if (PyCodec_Register(search_function))
+       goto onError;
+    
+    Py_INCREF(Py_None);
+    return Py_None;
+
+ onError:
+    return NULL;
+}
+
+static
+PyObject *codeclookup(PyObject *self, PyObject *args)
+{
+    char *encoding;
+
+    if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
+        goto onError;
+
+    return _PyCodec_Lookup(encoding);
+
+ onError:
+    return NULL;
+}
+
+/* --- Helpers ------------------------------------------------------------ */
+
+static
+PyObject *codec_tuple(PyObject *unicode,
+                     int len)
+{
+    PyObject *v,*w;
+    
+    if (unicode == NULL)
+       return NULL;
+    v = PyTuple_New(2);
+    if (v == NULL) {
+       Py_DECREF(unicode);
+       return NULL;
+    }
+    PyTuple_SET_ITEM(v,0,unicode);
+    w = PyInt_FromLong(len);
+    if (w == NULL) {
+       Py_DECREF(v);
+       return NULL;
+    }
+    PyTuple_SET_ITEM(v,1,w);
+    return v;
+}
+
+/* --- Decoder ------------------------------------------------------------ */
+
+static PyObject *
+unicode_internal_decode(PyObject *self,
+                       PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "s#|z:unicode_internal_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data, 
+                                              size / sizeof(Py_UNICODE)),
+                      size);
+}
+
+static PyObject *
+utf_8_decode(PyObject *self,
+           PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
+                      size);
+}
+
+static PyObject *
+utf_16_decode(PyObject *self,
+           PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 0;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
+                         &data, &size, &errors))
+       return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+                      size);
+}
+
+static PyObject *
+utf_16_le_decode(PyObject *self,
+                PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = -1;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
+                         &data, &size, &errors))
+       return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+                      size);
+}
+
+static PyObject *
+utf_16_be_decode(PyObject *self,
+                PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 1;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
+                         &data, &size, &errors))
+       return NULL;
+    return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
+                      size);
+}
+
+/* This non-standard version also provides access to the byteorder
+   parameter of the builtin UTF-16 codec.
+
+   It returns a tuple (unicode, bytesread, byteorder) with byteorder
+   being the value in effect at the end of data.
+
+*/
+
+static PyObject *
+utf_16_ex_decode(PyObject *self,
+                PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    int byteorder = 0;
+    PyObject *unicode, *tuple;
+    
+    if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
+                         &data, &size, &errors, &byteorder))
+       return NULL;
+
+    unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
+    if (unicode == NULL)
+       return NULL;
+    tuple = Py_BuildValue("Oii", unicode, size, byteorder);
+    Py_DECREF(unicode);
+    return tuple;
+}
+
+static PyObject *
+unicode_escape_decode(PyObject *self,
+                    PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
+                      size);
+}
+
+static PyObject *
+raw_unicode_escape_decode(PyObject *self,
+                       PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
+                      size);
+}
+
+static PyObject *
+latin_1_decode(PyObject *self,
+              PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
+                      size);
+}
+
+static PyObject *
+ascii_decode(PyObject *self,
+            PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
+                      size);
+}
+
+static PyObject *
+charmap_decode(PyObject *self,
+              PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+    PyObject *mapping = NULL;
+    
+    if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
+                         &data, &size, &errors, &mapping))
+       return NULL;
+    if (mapping == Py_None)
+       mapping = NULL;
+
+    return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
+                      size);
+}
+
+/* --- Encoder ------------------------------------------------------------ */
+
+static PyObject *
+readbuffer_encode(PyObject *self,
+                 PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyString_FromStringAndSize(data, size),
+                      size);
+}
+
+static PyObject *
+charbuffer_encode(PyObject *self,
+                 PyObject *args)
+{
+    const char *data;
+    int size;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
+                         &data, &size, &errors))
+       return NULL;
+
+    return codec_tuple(PyString_FromStringAndSize(data, size),
+                      size);
+}
+
+static PyObject *
+utf_8_encode(PyObject *self,
+           PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:utf_8_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
+                                           PyUnicode_GET_SIZE(str),
+                                           errors),
+                      PyUnicode_GET_SIZE(str));
+}
+
+/* This version provides access to the byteorder parameter of the
+   builtin UTF-16 codecs as optional third argument. It defaults to 0
+   which means: use the native byte order and prepend the data with a
+   BOM mark.  
+
+*/
+
+static PyObject *
+utf_16_encode(PyObject *self,
+           PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+    int byteorder = 0;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_encode",
+                         &str, &errors, &byteorder))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+                                            PyUnicode_GET_SIZE(str),
+                                            errors,
+                                            byteorder),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_le_encode(PyObject *self,
+                PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_le_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+                                            PyUnicode_GET_SIZE(str),
+                                            errors,
+                                            -1),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+utf_16_be_encode(PyObject *self,
+                PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zi:utf_16_be_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
+                                            PyUnicode_GET_SIZE(str),
+                                            errors,
+                                            +1),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+unicode_escape_encode(PyObject *self,
+                    PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:unicode_escape_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeUnicodeEscape(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str)),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+raw_unicode_escape_encode(PyObject *self,
+                       PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:raw_unicode_escape_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str)),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+latin_1_encode(PyObject *self,
+              PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:latin_1_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeLatin1(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str),
+                              errors),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+ascii_encode(PyObject *self,
+            PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|z:ascii_encode",
+                         &str, &errors))
+       return NULL;
+
+    return codec_tuple(PyUnicode_EncodeASCII(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str),
+                              errors),
+                      PyUnicode_GET_SIZE(str));
+}
+
+static PyObject *
+charmap_encode(PyObject *self,
+            PyObject *args)
+{
+    PyObject *str;
+    const char *errors = NULL;
+    PyObject *mapping = NULL;
+
+    if (!PyArg_ParseTuple(args, "U|zO:charmap_encode",
+                         &str, &errors, &mapping))
+       return NULL;
+    if (mapping == Py_None)
+       mapping = NULL;
+
+    return codec_tuple(PyUnicode_EncodeCharmap(
+                              PyUnicode_AS_UNICODE(str), 
+                              PyUnicode_GET_SIZE(str),
+                              mapping, 
+                              errors),
+                      PyUnicode_GET_SIZE(str));
+}
+
+/* --- Module API --------------------------------------------------------- */
+
+static PyMethodDef _codecs_functions[] = {
+    {"register",               codecregister,                  1},
+    {"lookup",                 codeclookup,                    1},
+    {"utf_8_encode",           utf_8_encode,                   1},
+    {"utf_8_decode",           utf_8_decode,                   1},
+    {"utf_16_encode",          utf_16_encode,                  1},
+    {"utf_16_le_encode",       utf_16_le_encode,               1},
+    {"utf_16_be_encode",       utf_16_be_encode,               1},
+    {"utf_16_decode",          utf_16_decode,                  1},
+    {"utf_16_le_decode",       utf_16_le_decode,               1},
+    {"utf_16_be_decode",       utf_16_be_decode,               1},
+    {"utf_16_ex_decode",       utf_16_ex_decode,               1},
+    {"unicode_escape_encode",  unicode_escape_encode,          1},
+    {"unicode_escape_decode",  unicode_escape_decode,          1},
+    {"unicode_internal_encode",        readbuffer_encode,              1},
+    {"unicode_internal_decode",        unicode_internal_decode,        1},
+    {"raw_unicode_escape_encode", raw_unicode_escape_encode,   1},
+    {"raw_unicode_escape_decode", raw_unicode_escape_decode,   1},
+    {"latin_1_encode",                 latin_1_encode,                 1},
+    {"latin_1_decode",                 latin_1_decode,                 1},
+    {"ascii_encode",           ascii_encode,                   1},
+    {"ascii_decode",           ascii_decode,                   1},
+    {"charmap_encode",                 charmap_encode,                 1},
+    {"charmap_decode",                 charmap_decode,                 1},
+    {"readbuffer_encode",      readbuffer_encode,              1},
+    {"charbuffer_encode",      charbuffer_encode,              1},
+    {NULL, NULL}               /* sentinel */
+};
+
+DL_EXPORT(void)
+init_codecs()
+{
+    Py_InitModule("_codecs", _codecs_functions);
+}