]> granicus.if.org Git - python/commitdiff
Issue #25761: Improved detecting errors in broken pickle data.
authorSerhiy Storchaka <storchaka@gmail.com>
Sun, 6 Dec 2015 20:01:35 +0000 (22:01 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Sun, 6 Dec 2015 20:01:35 +0000 (22:01 +0200)
Lib/pickle.py
Lib/test/pickletester.py
Lib/test/test_pickle.py
Misc/NEWS
Modules/_pickle.c

index 53978fbd65d9e04eb7a0a141459b55dc4542b283..a60b1b74c5ce6594e408339fa4d1a31320a4bacd 100644 (file)
@@ -1031,7 +1031,7 @@ class _Unpickler:
         self._unframer = _Unframer(self._file_read, self._file_readline)
         self.read = self._unframer.read
         self.readline = self._unframer.readline
-        self.mark = object() # any new unique object
+        self.metastack = []
         self.stack = []
         self.append = self.stack.append
         self.proto = 0
@@ -1047,20 +1047,12 @@ class _Unpickler:
         except _Stop as stopinst:
             return stopinst.value
 
-    # Return largest index k such that self.stack[k] is self.mark.
-    # If the stack doesn't contain a mark, eventually raises IndexError.
-    # This could be sped by maintaining another stack, of indices at which
-    # the mark appears.  For that matter, the latter stack would suffice,
-    # and we wouldn't need to push mark objects on self.stack at all.
-    # Doing so is probably a good thing, though, since if the pickle is
-    # corrupt (or hostile) we may get a clue from finding self.mark embedded
-    # in unpickled objects.
-    def marker(self):
-        stack = self.stack
-        mark = self.mark
-        k = len(stack)-1
-        while stack[k] is not mark: k = k-1
-        return k
+    # Return a list of items pushed in the stack after last MARK instruction.
+    def pop_mark(self):
+        items = self.stack
+        self.stack = self.metastack.pop()
+        self.append = self.stack.append
+        return items
 
     def persistent_load(self, pid):
         raise UnpicklingError("unsupported persistent id encountered")
@@ -1237,8 +1229,8 @@ class _Unpickler:
     dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
 
     def load_tuple(self):
-        k = self.marker()
-        self.stack[k:] = [tuple(self.stack[k+1:])]
+        items = self.pop_mark()
+        self.append(tuple(items))
     dispatch[TUPLE[0]] = load_tuple
 
     def load_empty_tuple(self):
@@ -1270,21 +1262,20 @@ class _Unpickler:
     dispatch[EMPTY_SET[0]] = load_empty_set
 
     def load_frozenset(self):
-        k = self.marker()
-        self.stack[k:] = [frozenset(self.stack[k+1:])]
+        items = self.pop_mark()
+        self.append(frozenset(items))
     dispatch[FROZENSET[0]] = load_frozenset
 
     def load_list(self):
-        k = self.marker()
-        self.stack[k:] = [self.stack[k+1:]]
+        items = self.pop_mark()
+        self.append(items)
     dispatch[LIST[0]] = load_list
 
     def load_dict(self):
-        k = self.marker()
-        items = self.stack[k+1:]
+        items = self.pop_mark()
         d = {items[i]: items[i+1]
              for i in range(0, len(items), 2)}
-        self.stack[k:] = [d]
+        self.append(d)
     dispatch[DICT[0]] = load_dict
 
     # INST and OBJ differ only in how they get a class object.  It's not
@@ -1292,9 +1283,7 @@ class _Unpickler:
     # previously diverged and grew different bugs.
     # klass is the class to instantiate, and k points to the topmost mark
     # object, following which are the arguments for klass.__init__.
-    def _instantiate(self, klass, k):
-        args = tuple(self.stack[k+1:])
-        del self.stack[k:]
+    def _instantiate(self, klass, args):
         if (args or not isinstance(klass, type) or
             hasattr(klass, "__getinitargs__")):
             try:
@@ -1310,14 +1299,14 @@ class _Unpickler:
         module = self.readline()[:-1].decode("ascii")
         name = self.readline()[:-1].decode("ascii")
         klass = self.find_class(module, name)
-        self._instantiate(klass, self.marker())
+        self._instantiate(klass, self.pop_mark())
     dispatch[INST[0]] = load_inst
 
     def load_obj(self):
         # Stack is ... markobject classobject arg1 arg2 ...
-        k = self.marker()
-        klass = self.stack.pop(k+1)
-        self._instantiate(klass, k)
+        args = self.pop_mark()
+        cls = args.pop(0)
+        self._instantiate(cls, args)
     dispatch[OBJ[0]] = load_obj
 
     def load_newobj(self):
@@ -1402,12 +1391,14 @@ class _Unpickler:
     dispatch[REDUCE[0]] = load_reduce
 
     def load_pop(self):
-        del self.stack[-1]
+        if self.stack:
+            del self.stack[-1]
+        else:
+            self.pop_mark()
     dispatch[POP[0]] = load_pop
 
     def load_pop_mark(self):
-        k = self.marker()
-        del self.stack[k:]
+        self.pop_mark()
     dispatch[POP_MARK[0]] = load_pop_mark
 
     def load_dup(self):
@@ -1463,17 +1454,14 @@ class _Unpickler:
     dispatch[APPEND[0]] = load_append
 
     def load_appends(self):
-        stack = self.stack
-        mark = self.marker()
-        list_obj = stack[mark - 1]
-        items = stack[mark + 1:]
+        items = self.pop_mark()
+        list_obj = self.stack[-1]
         if isinstance(list_obj, list):
             list_obj.extend(items)
         else:
             append = list_obj.append
             for item in items:
                 append(item)
-        del stack[mark:]
     dispatch[APPENDS[0]] = load_appends
 
     def load_setitem(self):
@@ -1485,27 +1473,21 @@ class _Unpickler:
     dispatch[SETITEM[0]] = load_setitem
 
     def load_setitems(self):
-        stack = self.stack
-        mark = self.marker()
-        dict = stack[mark - 1]
-        for i in range(mark + 1, len(stack), 2):
-            dict[stack[i]] = stack[i + 1]
-
-        del stack[mark:]
+        items = self.pop_mark()
+        dict = self.stack[-1]
+        for i in range(0, len(items), 2):
+            dict[items[i]] = items[i + 1]
     dispatch[SETITEMS[0]] = load_setitems
 
     def load_additems(self):
-        stack = self.stack
-        mark = self.marker()
-        set_obj = stack[mark - 1]
-        items = stack[mark + 1:]
+        items = self.pop_mark()
+        set_obj = self.stack[-1]
         if isinstance(set_obj, set):
             set_obj.update(items)
         else:
             add = set_obj.add
             for item in items:
                 add(item)
-        del stack[mark:]
     dispatch[ADDITEMS[0]] = load_additems
 
     def load_build(self):
@@ -1533,7 +1515,9 @@ class _Unpickler:
     dispatch[BUILD[0]] = load_build
 
     def load_mark(self):
-        self.append(self.mark)
+        self.metastack.append(self.stack)
+        self.stack = []
+        self.append = self.stack.append
     dispatch[MARK[0]] = load_mark
 
     def load_stop(self):
index 608c35ac723c20bd6da2e74b7c505cfd5574373f..217aa3db0c5436c8508ed0d23c72ecba2e4d2e63 100644 (file)
@@ -1000,7 +1000,7 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'0',                       # POP
             b'1',                       # POP_MARK
             b'2',                       # DUP
-            # b'(2',                    # PyUnpickler doesn't raise
+            b'(2',
             b'R',                       # REDUCE
             b')R',
             b'a',                       # APPEND
@@ -1009,7 +1009,7 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'Nb',
             b'd',                       # DICT
             b'e',                       # APPENDS
-            # b'(e',                    # PyUnpickler raises AttributeError
+            b'(e',
             b'ibuiltins\nlist\n',       # INST
             b'l',                       # LIST
             b'o',                       # OBJ
@@ -1022,7 +1022,7 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'NNs',
             b't',                       # TUPLE
             b'u',                       # SETITEMS
-            # b'(u',                    # PyUnpickler doesn't raise
+            b'(u',
             b'}(Nu',
             b'\x81',                    # NEWOBJ
             b')\x81',
@@ -1033,7 +1033,7 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'N\x87',
             b'NN\x87',
             b'\x90',                    # ADDITEMS
-            # b'(\x90',                 # PyUnpickler raises AttributeError
+            b'(\x90',
             b'\x91',                    # FROZENSET
             b'\x92',                    # NEWOBJ_EX
             b')}\x92',
@@ -1046,7 +1046,7 @@ class AbstractUnpickleTests(unittest.TestCase):
 
     def test_bad_mark(self):
         badpickles = [
-            b'N(.',                     # STOP
+            b'N(.',                     # STOP
             b'N(2',                     # DUP
             b'cbuiltins\nlist\n)(R',    # REDUCE
             b'cbuiltins\nlist\n()R',
@@ -1081,7 +1081,7 @@ class AbstractUnpickleTests(unittest.TestCase):
             b'N(\x94',                  # MEMOIZE
         ]
         for p in badpickles:
-            self.check_unpickling_error(self.bad_mark_errors, p)
+            self.check_unpickling_error(self.bad_stack_errors, p)
 
     def test_truncated_data(self):
         self.check_unpickling_error(EOFError, b'')
@@ -2581,11 +2581,6 @@ class AbstractPickleModuleTests(unittest.TestCase):
         self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
         self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
 
-    def test_bad_input(self):
-        # Test issue4298
-        s = bytes([0x58, 0, 0, 0, 0x54])
-        self.assertRaises(EOFError, pickle.loads, s)
-
 
 class AbstractPersistentPicklerTests(unittest.TestCase):
 
index bd38cfbea9380b7d1122d817d42f3283ef391302..6b9731543885b409d03892fd37a45fb5a5674567 100644 (file)
@@ -33,8 +33,6 @@ class PyUnpicklerTests(AbstractUnpickleTests):
 
     unpickler = pickle._Unpickler
     bad_stack_errors = (IndexError,)
-    bad_mark_errors = (IndexError, pickle.UnpicklingError,
-                       TypeError, AttributeError, EOFError)
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
@@ -69,8 +67,6 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
     pickler = pickle._Pickler
     unpickler = pickle._Unpickler
     bad_stack_errors = (pickle.UnpicklingError, IndexError)
-    bad_mark_errors = (pickle.UnpicklingError, IndexError,
-                       TypeError, AttributeError, EOFError)
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
@@ -132,7 +128,6 @@ if has_c_implementation:
     class CUnpicklerTests(PyUnpicklerTests):
         unpickler = _pickle.Unpickler
         bad_stack_errors = (pickle.UnpicklingError,)
-        bad_mark_errors = (EOFError,)
         truncated_errors = (pickle.UnpicklingError, EOFError,
                             AttributeError, ValueError)
 
index 026a19af757e23c3bbec74fab77f3bee925b3589..578d2a371a2e93941774ebf56c30e0b40c5c09be 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -109,6 +109,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #25761: Improved detecting errors in broken pickle data.
+
 - Issue #25717: Restore the previous behaviour of tolerating most fstat()
   errors when opening files.  This was a regression in 3.5a1, and stopped
   anonymous temporary files from working in special cases.
index 3ddf6a0281d546a637bac4c2148a1a411daf10b4..38598c52922a3845ec6aa5d557540e6ee7e2f3d6 100644 (file)
@@ -370,18 +370,12 @@ _Pickle_FastCall(PyObject *func, PyObject *obj)
 
 /*************************************************************************/
 
-static int
-stack_underflow(void)
-{
-    PickleState *st = _Pickle_GetGlobalState();
-    PyErr_SetString(st->UnpicklingError, "unpickling stack underflow");
-    return -1;
-}
-
 /* Internal data type used as the unpickling stack. */
 typedef struct {
     PyObject_VAR_HEAD
     PyObject **data;
+    int mark_set;          /* is MARK set? */
+    Py_ssize_t fence;      /* position of top MARK or 0 */
     Py_ssize_t allocated;  /* number of slots in data allocated */
 } Pdata;
 
@@ -412,6 +406,8 @@ Pdata_New(void)
     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
         return NULL;
     Py_SIZE(self) = 0;
+    self->mark_set = 0;
+    self->fence = 0;
     self->allocated = 8;
     self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
     if (self->data)
@@ -429,8 +425,7 @@ Pdata_clear(Pdata *self, Py_ssize_t clearto)
 {
     Py_ssize_t i = Py_SIZE(self);
 
-    if (clearto < 0)
-        return stack_underflow();
+    assert(clearto >= self->fence);
     if (clearto >= i)
         return 0;
 
@@ -466,6 +461,17 @@ Pdata_grow(Pdata *self)
     return -1;
 }
 
+static int
+Pdata_stack_underflow(Pdata *self)
+{
+    PickleState *st = _Pickle_GetGlobalState();
+    PyErr_SetString(st->UnpicklingError,
+                    self->mark_set ?
+                    "unexpected MARK found" :
+                    "unpickling stack underflow");
+    return -1;
+}
+
 /* D is a Pdata*.  Pop the topmost element and store it into V, which
  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
  * is raised and V is set to NULL.
@@ -473,9 +479,8 @@ Pdata_grow(Pdata *self)
 static PyObject *
 Pdata_pop(Pdata *self)
 {
-    if (Py_SIZE(self) == 0) {
-        PickleState *st = _Pickle_GetGlobalState();
-        PyErr_SetString(st->UnpicklingError, "bad pickle data");
+    if (Py_SIZE(self) <= self->fence) {
+        Pdata_stack_underflow(self);
         return NULL;
     }
     return self->data[--Py_SIZE(self)];
@@ -507,6 +512,10 @@ Pdata_poptuple(Pdata *self, Py_ssize_t start)
     PyObject *tuple;
     Py_ssize_t len, i, j;
 
+    if (start < self->fence) {
+        Pdata_stack_underflow(self);
+        return NULL;
+    }
     len = Py_SIZE(self) - start;
     tuple = PyTuple_New(len);
     if (tuple == NULL)
@@ -4585,13 +4594,19 @@ find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
 static Py_ssize_t
 marker(UnpicklerObject *self)
 {
-    PickleState *st = _Pickle_GetGlobalState();
+    Py_ssize_t mark;
+
     if (self->num_marks < 1) {
+        PickleState *st = _Pickle_GetGlobalState();
         PyErr_SetString(st->UnpicklingError, "could not find MARK");
         return -1;
     }
 
-    return self->marks[--self->num_marks];
+    mark = self->marks[--self->num_marks];
+    self->stack->mark_set = self->num_marks != 0;
+    self->stack->fence = self->num_marks ?
+            self->marks[self->num_marks - 1] : 0;
+    return mark;
 }
 
 static int
@@ -5052,7 +5067,7 @@ load_counted_tuple(UnpicklerObject *self, int len)
     PyObject *tuple;
 
     if (Py_SIZE(self->stack) < len)
-        return stack_underflow();
+        return Pdata_stack_underflow(self->stack);
 
     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
     if (tuple == NULL)
@@ -5134,6 +5149,12 @@ load_dict(UnpicklerObject *self)
     if ((dict = PyDict_New()) == NULL)
         return -1;
 
+    if ((j - i) % 2 != 0) {
+        PickleState *st = _Pickle_GetGlobalState();
+        PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
+        return -1;
+    }
+
     for (k = i + 1; k < j; k += 2) {
         key = self->stack->data[k - 1];
         value = self->stack->data[k];
@@ -5201,7 +5222,7 @@ load_obj(UnpicklerObject *self)
         return -1;
 
     if (Py_SIZE(self->stack) - i < 1)
-        return stack_underflow();
+        return Pdata_stack_underflow(self->stack);
 
     args = Pdata_poptuple(self->stack, i + 1);
     if (args == NULL)
@@ -5518,12 +5539,15 @@ load_pop(UnpicklerObject *self)
      */
     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
         self->num_marks--;
-    } else if (len > 0) {
+        self->stack->mark_set = self->num_marks != 0;
+        self->stack->fence = self->num_marks ?
+                self->marks[self->num_marks - 1] : 0;
+    } else if (len <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
+    else {
         len--;
         Py_DECREF(self->stack->data[len]);
         Py_SIZE(self->stack) = len;
-    } else {
-        return stack_underflow();
     }
     return 0;
 }
@@ -5545,10 +5569,10 @@ static int
 load_dup(UnpicklerObject *self)
 {
     PyObject *last;
-    Py_ssize_t len;
+    Py_ssize_t len = Py_SIZE(self->stack);
 
-    if ((len = Py_SIZE(self->stack)) <= 0)
-        return stack_underflow();
+    if (len <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     last = self->stack->data[len - 1];
     PDATA_APPEND(self->stack, last, -1);
     return 0;
@@ -5731,8 +5755,8 @@ load_put(UnpicklerObject *self)
         return -1;
     if (len < 2)
         return bad_readline();
-    if (Py_SIZE(self->stack) <= 0)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     value = self->stack->data[Py_SIZE(self->stack) - 1];
 
     key = PyLong_FromString(s, NULL, 10);
@@ -5760,8 +5784,8 @@ load_binput(UnpicklerObject *self)
     if (_Unpickler_Read(self, &s, 1) < 0)
         return -1;
 
-    if (Py_SIZE(self->stack) <= 0)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     value = self->stack->data[Py_SIZE(self->stack) - 1];
 
     idx = Py_CHARMASK(s[0]);
@@ -5779,8 +5803,8 @@ load_long_binput(UnpicklerObject *self)
     if (_Unpickler_Read(self, &s, 4) < 0)
         return -1;
 
-    if (Py_SIZE(self->stack) <= 0)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     value = self->stack->data[Py_SIZE(self->stack) - 1];
 
     idx = calc_binsize(s, 4);
@@ -5798,8 +5822,8 @@ load_memoize(UnpicklerObject *self)
 {
     PyObject *value;
 
-    if (Py_SIZE(self->stack) <= 0)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     value = self->stack->data[Py_SIZE(self->stack) - 1];
 
     return _Unpickler_MemoPut(self, self->memo_len, value);
@@ -5813,8 +5837,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
     Py_ssize_t len, i;
 
     len = Py_SIZE(self->stack);
-    if (x > len || x <= 0)
-        return stack_underflow();
+    if (x > len || x <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     if (len == x)  /* nothing to do */
         return 0;
 
@@ -5863,8 +5887,8 @@ do_append(UnpicklerObject *self, Py_ssize_t x)
 static int
 load_append(UnpicklerObject *self)
 {
-    if (Py_SIZE(self->stack) - 1 <= 0)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     return do_append(self, Py_SIZE(self->stack) - 1);
 }
 
@@ -5886,8 +5910,8 @@ do_setitems(UnpicklerObject *self, Py_ssize_t x)
     int status = 0;
 
     len = Py_SIZE(self->stack);
-    if (x > len || x <= 0)
-        return stack_underflow();
+    if (x > len || x <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     if (len == x)  /* nothing to do */
         return 0;
     if ((len - x) % 2 != 0) {
@@ -5940,8 +5964,8 @@ load_additems(UnpicklerObject *self)
     if (mark < 0)
         return -1;
     len = Py_SIZE(self->stack);
-    if (mark > len || mark <= 0)
-        return stack_underflow();
+    if (mark > len || mark <= self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
     if (len == mark)  /* nothing to do */
         return 0;
 
@@ -5996,8 +6020,8 @@ load_build(UnpicklerObject *self)
     /* Stack is ... instance, state.  We want to leave instance at
      * the stack top, possibly mutated via instance.__setstate__(state).
      */
-    if (Py_SIZE(self->stack) < 2)
-        return stack_underflow();
+    if (Py_SIZE(self->stack) - 2 < self->stack->fence)
+        return Pdata_stack_underflow(self->stack);
 
     PDATA_POP(self->stack, state);
     if (state == NULL)
@@ -6133,7 +6157,8 @@ load_mark(UnpicklerObject *self)
         self->marks_size = (Py_ssize_t)alloc;
     }
 
-    self->marks[self->num_marks++] = Py_SIZE(self->stack);
+    self->stack->mark_set = 1;
+    self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
 
     return 0;
 }
@@ -6216,6 +6241,8 @@ load(UnpicklerObject *self)
     char *s = NULL;
 
     self->num_marks = 0;
+    self->stack->mark_set = 0;
+    self->stack->fence = 0;
     self->proto = 0;
     if (Py_SIZE(self->stack))
         Pdata_clear(self->stack, 0);