]> granicus.if.org Git - python/commitdiff
Change Py_BuildValue to generate Unicode objects for
authorMartin v. Löwis <martin@v.loewis.de>
Wed, 18 Jul 2007 02:28:27 +0000 (02:28 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Wed, 18 Jul 2007 02:28:27 +0000 (02:28 +0000)
's' and 'c' codes.
Change pickle to dump bytes objects using the 'S'
code, and to load the 'S' code as byte objects.
Change datetime and array to generate and expect
bytes objects in reduce/unreduce.

Lib/pickle.py
Lib/test/test_datetime.py
Modules/arraymodule.c
Modules/cPickle.c
Modules/datetimemodule.c
Objects/bytesobject.c
Objects/exceptions.c
Python/modsupport.c

index 27f7ecaffa6e9d187cf96799420f12b0e74fab90..c158b8da0f630af8ebedd691b8c03c72d6257ecb 100644 (file)
@@ -506,6 +506,20 @@ class Pickler:
         self.memoize(obj)
     dispatch[str8] = save_string
 
+    def save_bytes(self, obj):
+        # Like save_string
+        if self.bin:
+            n = len(obj)
+            if n < 256:
+                self.write(SHORT_BINSTRING + bytes([n]) + bytes(obj))
+            else:
+                self.write(BINSTRING + pack("<i", n) + bytes(obj))
+        else:
+            # Strip leading 'b'
+            self.write(STRING + bytes(repr(obj).lstrip("b")) + b'\n')
+        self.memoize(obj)
+    dispatch[bytes] = save_bytes
+
     def save_unicode(self, obj, pack=struct.pack):
         if self.bin:
             encoded = obj.encode('utf-8')
@@ -931,12 +945,12 @@ class Unpickler:
                 break
         else:
             raise ValueError, "insecure string pickle"
-        self.append(str8(codecs.escape_decode(rep)[0]))
+        self.append(bytes(codecs.escape_decode(rep)[0]))
     dispatch[STRING[0]] = load_string
 
     def load_binstring(self):
         len = mloads(b'i' + self.read(4))
-        self.append(str8(self.read(len)))
+        self.append(self.read(len))
     dispatch[BINSTRING[0]] = load_binstring
 
     def load_unicode(self):
@@ -950,7 +964,7 @@ class Unpickler:
 
     def load_short_binstring(self):
         len = ord(self.read(1))
-        self.append(str8(self.read(len)))
+        self.append(self.read(len))
     dispatch[SHORT_BINSTRING[0]] = load_short_binstring
 
     def load_tuple(self):
index ac6d13004769a959edca8d9a1a799e830d4b135e..cf7051bc0df8bca4f1990bdbeaf0f29a43ae2491 100644 (file)
@@ -1097,8 +1097,7 @@ class TestDate(HarmlessMixedComparison, unittest.TestCase):
             # This shouldn't blow up because of the month byte alone.  If
             # the implementation changes to do more-careful checking, it may
             # blow up because other fields are insane.
-            # XXX Maybe this will have to become bytes?
-            self.theclass(str8(base[:2] + chr(ord_byte) + base[3:]))
+            self.theclass(bytes(base[:2] + chr(ord_byte) + base[3:]))
 
 #############################################################################
 # datetime tests
index 11819e271f82137708952546c8387656eb67020f..7ddc7e8aace40552511e696cb216f8aaaf01cc5e 100644 (file)
@@ -1126,7 +1126,7 @@ array_reduce(arrayobject *array)
                Py_INCREF(dict);
        }
        if (array->ob_size > 0) {
-               result = Py_BuildValue("O(cs#)O", 
+               result = Py_BuildValue("O(cy#)O", 
                        array->ob_type, 
                        array->ob_descr->typecode,
                        array->ob_item,
index d50c74358ac285bc4d8091bfc09074184a3850b8..ff29b6731dce0b135f3a32e48c7e984190949b22 100644 (file)
@@ -1151,6 +1151,92 @@ save_string(Picklerobject *self, PyObject *args, int doput)
 }
 
 
+static int
+save_bytes(Picklerobject *self, PyObject *args, int doput)
+{
+       int size, len;
+       PyObject *repr=0;
+
+       if ((size = PyBytes_Size(args)) < 0)
+               return -1;
+
+       if (!self->bin) {
+               char *repr_str;
+
+               static char string = STRING;
+
+               if (!( repr = PyObject_ReprStr8(args)))
+                       return -1;
+
+               if ((len = PyString_Size(repr)) < 0)
+                       goto err;
+               repr_str = PyString_AS_STRING((PyStringObject *)repr);
+
+                /* Strip leading 's' due to repr() of str8() returning s'...' */
+                if (repr_str[0] == 'b') {
+                       repr_str++;
+                       len--;
+               }
+
+               if (self->write_func(self, &string, 1) < 0)
+                       goto err;
+
+               if (self->write_func(self, repr_str, len) < 0)
+                       goto err;
+
+               if (self->write_func(self, "\n", 1) < 0)
+                       goto err;
+
+               Py_XDECREF(repr);
+       }
+       else {
+               int i;
+               char c_str[5];
+
+               if ((size = PyBytes_Size(args)) < 0)
+                       return -1;
+
+               if (size < 256) {
+                       c_str[0] = SHORT_BINSTRING;
+                       c_str[1] = size;
+                       len = 2;
+               }
+               else if (size <= INT_MAX) {
+                       c_str[0] = BINSTRING;
+                       for (i = 1; i < 5; i++)
+                               c_str[i] = (int)(size >> ((i - 1) * 8));
+                       len = 5;
+               }
+               else
+                       return -1;    /* string too large */
+
+               if (self->write_func(self, c_str, len) < 0)
+                       return -1;
+
+               if (size > 128 && Pdata_Check(self->file)) {
+                       if (write_other(self, NULL, 0) < 0) return -1;
+                       PDATA_APPEND(self->file, args, -1);
+               }
+               else {
+                       if (self->write_func(self,
+                                            PyBytes_AsString(args),
+                                            size) < 0)
+                               return -1;
+               }
+       }
+
+       if (doput)
+               if (put(self, args) < 0)
+                       return -1;
+
+       return 0;
+
+  err:
+       Py_XDECREF(repr);
+       return -1;
+}
+
+
 /* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
    backslash and newline characters to \uXXXX escapes. */
 static PyObject *
@@ -2086,11 +2172,11 @@ save(Picklerobject *self, PyObject *args, int pers_save)
        type = args->ob_type;
 
        switch (type->tp_name[0]) {
-       case 'b':
+       case 'b': /* XXX may want to save short byte strings here. */
                if (args == Py_False || args == Py_True) {
                        res = save_bool(self, args);
                        goto finally;
-               }
+               } 
                break;
         case 'i':
                if (type == &PyLong_Type) {
@@ -2197,6 +2283,11 @@ save(Picklerobject *self, PyObject *args, int pers_save)
                        res = save_global(self, args, NULL);
                        goto finally;
                }
+               else if (type == &PyBytes_Type) {
+                       res = save_bytes(self, args, 1);
+                       goto finally;
+               }
+               break;
        }
 
        if (!pers_save && self->inst_pers_func) {
@@ -3131,11 +3222,17 @@ load_string(Unpicklerobject *self)
                goto insecure;
        /********************************************/
 
+       /* XXX avoid going through str8 here. */
        str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
        free(s);
        if (str) {
-               PDATA_PUSH(self->stack, str, -1);
-               res = 0;
+               PyObject *str2 = PyBytes_FromStringAndSize(
+                       PyString_AsString(str), PyString_Size(str));
+               Py_DECREF(str);
+               if (str2) {
+                       PDATA_PUSH(self->stack, str2, -1);
+                       res = 0;
+               }
        }
        return res;
 
@@ -3160,7 +3257,7 @@ load_binstring(Unpicklerobject *self)
        if (self->read_func(self, &s, l) < 0)
                return -1;
 
-       if (!( py_string = PyString_FromStringAndSize(s, l)))
+       if (!( py_string = PyBytes_FromStringAndSize(s, l)))
                return -1;
 
        PDATA_PUSH(self->stack, py_string, -1);
@@ -3182,7 +3279,7 @@ load_short_binstring(Unpicklerobject *self)
 
        if (self->read_func(self, &s, l) < 0) return -1;
 
-       if (!( py_string = PyString_FromStringAndSize(s, l)))  return -1;
+       if (!( py_string = PyBytes_FromStringAndSize(s, l)))  return -1;
 
        PDATA_PUSH(self->stack, py_string, -1);
        return 0;
index 61f9ab7d0ab39729d2f1d9e408b8e52a605a6d5d..aa037c27d7bca05ac102ec0a6fa706737631e1b2 100644 (file)
@@ -2183,15 +2183,15 @@ date_new(PyTypeObject *type, PyObject *args, PyObject *kw)
 
        /* Check for invocation from pickle with __getstate__ state */
        if (PyTuple_GET_SIZE(args) == 1 &&
-           PyString_Check(state = PyTuple_GET_ITEM(args, 0)) &&
-           PyString_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE &&
-           MONTH_IS_SANE(PyString_AS_STRING(state)[2]))
+           PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
+           PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE &&
+           MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
        {
                PyDateTime_Date *me;
 
                me = (PyDateTime_Date *) (type->tp_alloc(type, 0));
                if (me != NULL) {
-                       char *pdata = PyString_AS_STRING(state);
+                       char *pdata = PyBytes_AS_STRING(state);
                        memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE);
                        me->hashcode = -1;
                }
@@ -2509,13 +2509,13 @@ date_replace(PyDateTime_Date *self, PyObject *args, PyObject *kw)
        return clone;
 }
 
-static PyObject *date_getstate(PyDateTime_Date *self);
+static PyObject *date_getstate(PyDateTime_Date *self, int hashable);
 
 static long
 date_hash(PyDateTime_Date *self)
 {
        if (self->hashcode == -1) {
-               PyObject *temp = date_getstate(self);
+               PyObject *temp = date_getstate(self, 1);
                if (temp != NULL) {
                        self->hashcode = PyObject_Hash(temp);
                        Py_DECREF(temp);
@@ -2543,18 +2543,22 @@ date_weekday(PyDateTime_Date *self)
 
 /* __getstate__ isn't exposed */
 static PyObject *
-date_getstate(PyDateTime_Date *self)
+date_getstate(PyDateTime_Date *self, int hashable)
 {
-       return Py_BuildValue(
-               "(N)",
-               PyString_FromStringAndSize((char *)self->data,
-                                          _PyDateTime_DATE_DATASIZE));
+       PyObject* field;
+       if (hashable)
+               field = PyString_FromStringAndSize(
+                       (char*)self->data, _PyDateTime_DATE_DATASIZE);
+       else
+               field = PyBytes_FromStringAndSize(
+                       (char*)self->data, _PyDateTime_DATE_DATASIZE);
+       return Py_BuildValue("(N)", field);
 }
 
 static PyObject *
 date_reduce(PyDateTime_Date *self, PyObject *arg)
 {
-       return Py_BuildValue("(ON)", self->ob_type, date_getstate(self));
+       return Py_BuildValue("(ON)", self->ob_type, date_getstate(self, 0));
 }
 
 static PyMethodDef date_methods[] = {
@@ -2998,9 +3002,9 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
        /* Check for invocation from pickle with __getstate__ state */
        if (PyTuple_GET_SIZE(args) >= 1 &&
            PyTuple_GET_SIZE(args) <= 2 &&
-           PyString_Check(state = PyTuple_GET_ITEM(args, 0)) &&
-           PyString_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE &&
-           ((unsigned char) (PyString_AS_STRING(state)[0])) < 24)
+           PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
+           PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE &&
+           ((unsigned char) (PyBytes_AS_STRING(state)[0])) < 24)
        {
                PyDateTime_Time *me;
                char aware;
@@ -3016,7 +3020,7 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
                aware = (char)(tzinfo != Py_None);
                me = (PyDateTime_Time *) (type->tp_alloc(type, aware));
                if (me != NULL) {
-                       char *pdata = PyString_AS_STRING(state);
+                       char *pdata = PyBytes_AS_STRING(state);
 
                        memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE);
                        me->hashcode = -1;
@@ -3331,7 +3335,7 @@ time_getstate(PyDateTime_Time *self)
        PyObject *basestate;
        PyObject *result = NULL;
 
-       basestate =  PyString_FromStringAndSize((char *)self->data,
+       basestate =  PyBytes_FromStringAndSize((char *)self->data,
                                                _PyDateTime_TIME_DATASIZE);
        if (basestate != NULL) {
                if (! HASTZINFO(self) || self->tzinfo == Py_None)
@@ -3513,9 +3517,9 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
        /* Check for invocation from pickle with __getstate__ state */
        if (PyTuple_GET_SIZE(args) >= 1 &&
            PyTuple_GET_SIZE(args) <= 2 &&
-           PyString_Check(state = PyTuple_GET_ITEM(args, 0)) &&
-           PyString_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE &&
-           MONTH_IS_SANE(PyString_AS_STRING(state)[2]))
+           PyBytes_Check(state = PyTuple_GET_ITEM(args, 0)) &&
+           PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE &&
+           MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
        {
                PyDateTime_DateTime *me;
                char aware;
@@ -3531,7 +3535,7 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
                aware = (char)(tzinfo != Py_None);
                me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware));
                if (me != NULL) {
-                       char *pdata = PyString_AS_STRING(state);
+                       char *pdata = PyBytes_AS_STRING(state);
 
                        memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE);
                        me->hashcode = -1;
@@ -4375,8 +4379,8 @@ datetime_getstate(PyDateTime_DateTime *self)
        PyObject *basestate;
        PyObject *result = NULL;
 
-       basestate = PyString_FromStringAndSize((char *)self->data,
-                                         _PyDateTime_DATETIME_DATASIZE);
+       basestate = PyBytes_FromStringAndSize((char *)self->data,
+                                             _PyDateTime_DATETIME_DATASIZE);
        if (basestate != NULL) {
                if (! HASTZINFO(self) || self->tzinfo == Py_None)
                        result = PyTuple_Pack(1, basestate);
index 532e63777c5937561a53f5f4a6a60c05c0cf051d..6340b46e846a531a006b3a1128c179224bc26072 100644 (file)
@@ -2724,6 +2724,9 @@ PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
 static PyObject *
 bytes_reduce(PyBytesObject *self)
 {
+    /* XXX: This currently returns a Py_UNICODE-widened string
+       in the tuple which is completely useless. Pickle stopped
+       using it for that reason. */
     return Py_BuildValue("(O(s#))",
                          self->ob_type,
                          self->ob_bytes == NULL ? "" : self->ob_bytes,
index 1df0ea0852db27622efad8c2533edc1fbbec8ce4..a4018068ab59a43724e62a92ee740841eb00a5c9 100644 (file)
@@ -831,28 +831,32 @@ my_basename(char *name)
 static PyObject *
 SyntaxError_str(PySyntaxErrorObject *self)
 {
-    int have_filename = 0;
     int have_lineno = 0;
+    char *filename = 0;
 
     /* XXX -- do all the additional formatting with filename and
        lineno here */
 
-    have_filename = (self->filename != NULL) &&
-        PyString_Check(self->filename);
+    if (self->filename) {
+       if (PyString_Check(self->filename))
+           filename = PyString_AsString(self->filename);
+       else if (PyUnicode_Check(self->filename))
+           filename = PyUnicode_AsString(self->filename);
+    }
     have_lineno = (self->lineno != NULL) && PyInt_CheckExact(self->lineno);
 
-    if (!have_filename && !have_lineno)
+    if (!filename && !have_lineno)
         return PyObject_Unicode(self->msg ? self->msg : Py_None);
 
-    if (have_filename && have_lineno)
+    if (filename && have_lineno)
         return PyUnicode_FromFormat("%S (%s, line %ld)",
                    self->msg ? self->msg : Py_None,
-                   my_basename(PyString_AS_STRING(self->filename)),
+                   my_basename(filename),
                    PyInt_AsLong(self->lineno));
-    else if (have_filename)
+    else if (filename)
         return PyUnicode_FromFormat("%S (%s)",
                    self->msg ? self->msg : Py_None,
-                   my_basename(PyString_AS_STRING(self->filename)));
+                   my_basename(filename));
     else /* only have_lineno */
         return PyUnicode_FromFormat("%S (line %ld)",
                    self->msg ? self->msg : Py_None,
index 330da5fe800005f7a50687164d7482944b48320a..d29fe9b0a74554dea328159738659690c77262d6 100644 (file)
@@ -387,7 +387,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
                {
                        char p[1];
                        p[0] = (char)va_arg(*p_va, int);
-                       return PyString_FromStringAndSize(p, 1);
+                       return PyUnicode_FromStringAndSize(p, 1);
                }
                case 'C':
                {
@@ -438,7 +438,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
                                        }
                                        n = (Py_ssize_t)m;
                                }
-                               v = PyString_FromStringAndSize(str, n);
+                               v = PyUnicode_FromStringAndSize(str, n);
                        }
                        return v;
                }