]> granicus.if.org Git - python/commitdiff
Change UnicodeDecodeError objects so that the 'object' attribute
authorWalter Dörwald <walter@livinglogic.de>
Fri, 4 May 2007 19:28:21 +0000 (19:28 +0000)
committerWalter Dörwald <walter@livinglogic.de>
Fri, 4 May 2007 19:28:21 +0000 (19:28 +0000)
is a bytes object.

Add 'y' and 'y#' format specifiers that work like 's' and 's#'
but only accept bytes objects.

Doc/api/utilities.tex
Doc/ext/extending.tex
Objects/exceptions.c
Python/getargs.c
Python/modsupport.c

index 93e3796455612d23c60f6ae018ea60158208bf5d..fb9c90996998c7c4a1485183762bef5693a0f462 100644 (file)
@@ -424,6 +424,18 @@ whose address should be passed.
   compatible objects pass back a reference to the raw internal data
   representation.
 
+  \item[\samp{y} (bytes object)
+  {[const char *]}]
+  This variant on \samp{s} convert a Python bytes object to a C pointer to a
+  character string. The bytes object must not contain embedded NUL bytes;
+  if it does, a \exception{TypeError} exception is raised.
+
+  \item[\samp{y\#} (bytes object)
+  {[const char *, int]}]
+  This variant on \samp{s#} stores into two C variables, the first one
+  a pointer to a character string, the second one its length.  This only
+  accepts bytes objects.
+
   \item[\samp{z} (string or \code{None}) {[const char *]}]
   Like \samp{s}, but the Python object may also be \code{None}, in
   which case the C pointer is set to \NULL.
index 2af88b531566edb09921e7a16f6f894dc3efce73..1f3e2d5b4fceee0f254bcbbbc10dc1058c988c0e 100644 (file)
@@ -802,8 +802,10 @@ Examples (to the left the call, to the right the resulting Python value):
     Py_BuildValue("i", 123)                  123
     Py_BuildValue("iii", 123, 456, 789)      (123, 456, 789)
     Py_BuildValue("s", "hello")              'hello'
+    Py_BuildValue("y", "hello")              b'hello'
     Py_BuildValue("ss", "hello", "world")    ('hello', 'world')
     Py_BuildValue("s#", "hello", 4)          'hell'
+    Py_BuildValue("y#", "hello", 4)          b'hell'
     Py_BuildValue("()")                      ()
     Py_BuildValue("(i)", 123)                (123,)
     Py_BuildValue("(ii)", 123, 456)          (123, 456)
index e30e9df655279a326bb7551fe17288da3e58ae27..1096bace5a67801b9cb4d0b9e0015f9388823f89 100644 (file)
@@ -1242,6 +1242,22 @@ set_string(PyObject **attr, const char *value)
 }
 
 
+static PyObject *
+get_bytes(PyObject *attr, const char *name)
+{
+    if (!attr) {
+        PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name);
+        return NULL;
+    }
+
+    if (!PyBytes_Check(attr)) {
+        PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name);
+        return NULL;
+    }
+    Py_INCREF(attr);
+    return attr;
+}
+
 static PyObject *
 get_unicode(PyObject *attr, const char *name)
 {
@@ -1280,7 +1296,7 @@ PyUnicodeEncodeError_GetObject(PyObject *exc)
 PyObject *
 PyUnicodeDecodeError_GetObject(PyObject *exc)
 {
-    return get_string(((PyUnicodeErrorObject *)exc)->object, "object");
+    return get_bytes(((PyUnicodeErrorObject *)exc)->object, "object");
 }
 
 PyObject *
@@ -1314,10 +1330,10 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
 {
     if (!get_int(((PyUnicodeErrorObject *)exc)->start, start, "start")) {
         Py_ssize_t size;
-        PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+        PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
                                    "object");
         if (!obj) return -1;
-        size = PyString_GET_SIZE(obj);
+        size = PyBytes_GET_SIZE(obj);
         if (*start<0)
             *start = 0;
         if (*start>=size)
@@ -1382,10 +1398,10 @@ PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
 {
     if (!get_int(((PyUnicodeErrorObject *)exc)->end, end, "end")) {
         Py_ssize_t size;
-        PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object,
+        PyObject *obj = get_bytes(((PyUnicodeErrorObject *)exc)->object,
                                    "object");
         if (!obj) return -1;
-        size = PyString_GET_SIZE(obj);
+        size = PyBytes_GET_SIZE(obj);
         if (*end<1)
             *end = 1;
         if (*end>size)
@@ -1629,7 +1645,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
     if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
         return -1;
     return UnicodeError_init((PyUnicodeErrorObject *)self, args,
-                             kwds, &PyString_Type);
+                             kwds, &PyBytes_Type);
 }
 
 static PyObject *
@@ -1648,7 +1664,7 @@ UnicodeDecodeError_str(PyObject *self)
         /* FromFormat does not support %02x, so format that separately */
         char byte[4];
         PyOS_snprintf(byte, sizeof(byte), "%02x",
-                      ((int)PyString_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
+                      ((int)PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[start])&0xff);
         return PyString_FromFormat(
             "'%.400s' codec can't decode byte 0x%s in position %zd: %.400s",
             PyString_AS_STRING(((PyUnicodeErrorObject *)self)->encoding),
@@ -1689,7 +1705,7 @@ PyUnicodeDecodeError_Create(
     assert(length < INT_MAX);
     assert(start < INT_MAX);
     assert(end < INT_MAX);
-    return PyObject_CallFunction(PyExc_UnicodeDecodeError, "ss#nns",
+    return PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
                                  encoding, object, length, start, end, reason);
 }
 
index f7a66048fbeeca2cf5ecff083f516758ae084d2a..8331a18965a3ff4e135ee5df03cc7c0bec7ce1f7 100644 (file)
@@ -819,6 +819,32 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                break;
        }
 
+       case 'y': {/* bytes */
+               if (*format == '#') {
+                       void **p = (void **)va_arg(*p_va, char **);
+                       FETCH_SIZE;
+                       
+                       if (PyBytes_Check(arg)) {
+                               *p = PyBytes_AS_STRING(arg);
+                               STORE_SIZE(PyBytes_GET_SIZE(arg));
+                       }
+                       else
+                               return converterr("bytes", arg, msgbuf, bufsize);
+                       format++;
+               } else {
+                       char **p = va_arg(*p_va, char **);
+                       
+                       if (PyBytes_Check(arg))
+                               *p = PyBytes_AS_STRING(arg);
+                       else
+                               return converterr("bytes", arg, msgbuf, bufsize);
+                       if ((Py_ssize_t)strlen(*p) != PyBytes_Size(arg))
+                               return converterr("bytes without null bytes",
+                                                 arg, msgbuf, bufsize);
+               }
+               break;
+       }
+
        case 'z': {/* string, may be NULL (None) */
                if (*format == '#') { /* any buffer-like object */
                        void **p = (void **)va_arg(*p_va, char **);
@@ -1595,6 +1621,7 @@ skipitem(const char **p_format, va_list *p_va, int flags)
        
        case 's': /* string */
        case 'z': /* string or None */
+       case 'y': /* bytes */
        case 'u': /* unicode string */
        case 't': /* buffer, read-only */
        case 'w': /* buffer, read-write */
index af774f0b602284fddad470575c2984d2f3b18a9e..8f600dc3459629e2a4b989e82d22cd1d9c645ba0 100644 (file)
@@ -424,6 +424,39 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
                        return v;
                }
 
+               case 'y':
+               {
+                       PyObject *v;
+                       char *str = va_arg(*p_va, char *);
+                       Py_ssize_t n;
+                       if (**p_format == '#') {
+                               ++*p_format;
+                               if (flags & FLAG_SIZE_T)
+                                       n = va_arg(*p_va, Py_ssize_t);
+                               else
+                                       n = va_arg(*p_va, int);
+                       }
+                       else
+                               n = -1;
+                       if (str == NULL) {
+                               v = Py_None;
+                               Py_INCREF(v);
+                       }
+                       else {
+                               if (n < 0) {
+                                       size_t m = strlen(str);
+                                       if (m > PY_SSIZE_T_MAX) {
+                                               PyErr_SetString(PyExc_OverflowError,
+                                                       "string too long for Python bytes");
+                                               return NULL;
+                                       }
+                                       n = (Py_ssize_t)m;
+                               }
+                               v = PyBytes_FromStringAndSize(str, n);
+                       }
+                       return v;
+               }
+
                case 'N':
                case 'S':
                case 'O':