]> granicus.if.org Git - python/commitdiff
Make C and Python implementations of pickle load STRING opcodes the same way.
authorAlexandre Vassalotti <alexandre@peadrop.com>
Tue, 16 Apr 2013 06:14:55 +0000 (23:14 -0700)
committerAlexandre Vassalotti <alexandre@peadrop.com>
Tue, 16 Apr 2013 06:14:55 +0000 (23:14 -0700)
The C version tried to remove trailing whitespace between the last quote and
the newline character. I am not sure why it had this because pickle never
generated such pickles---for this to happen repr(some_string) would need to
return trailing whitespace. It was maybe there to make it easier for people
to write pickles in text editors. Anyhow, the Python version doesn't do this
so there is no point keeping this around anymore.

Also, I've changed the exception raised when a bad pickle is encountered.
Again this unlikely to make much difference to anyone though it does make
testing slightly nicer for us.

Lib/pickle.py
Lib/test/pickletester.py
Modules/_pickle.c

index a4acbe941e0309b512f80563b9f50114c66f041a..d121ec99b5de6945aa368087283279fdc0dace50 100644 (file)
@@ -900,14 +900,13 @@ class _Unpickler:
     dispatch[BINFLOAT[0]] = load_binfloat
 
     def load_string(self):
-        orig = self.readline()
-        rep = orig[:-1]
+        data = self.readline()[:-1]
         # Strip outermost quotes
-        if len(rep) >= 2 and rep[0] == rep[-1] and rep[0] in b'"\'':
-            rep = rep[1:-1]
+        if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
+            data = data[1:-1]
         else:
-            raise ValueError("insecure string pickle")
-        self.append(codecs.escape_decode(rep)[0]
+            raise UnpicklingError("the STRING opcode argument must be quoted")
+        self.append(codecs.escape_decode(data)[0]
                     .decode(self.encoding, self.errors))
     dispatch[STRING[0]] = load_string
 
index a72ab377c010dc957bc552a238fec0e35c9ddd5c..c58b8761c0befec0cb2c0bc302ef9d837590c9a7 100644 (file)
@@ -601,30 +601,6 @@ class AbstractPickleTests(unittest.TestCase):
         self.assertRaises(KeyError, self.loads, b'g0\np0')
         self.assertEqual(self.loads(b'((Kdtp0\nh\x00l.))'), [(100,), (100,)])
 
-    def test_insecure_strings(self):
-        # XXX Some of these tests are temporarily disabled
-        insecure = [b"abc", b"2 + 2", # not quoted
-                    ## b"'abc' + 'def'", # not a single quoted string
-                    b"'abc", # quote is not closed
-                    b"'abc\"", # open quote and close quote don't match
-                    b"'abc'   ?", # junk after close quote
-                    b"'\\'", # trailing backslash
-                    # Variations on issue #17710
-                    b"'",
-                    b'"',
-                    b"' ",
-                    b"'  ",
-                    b"'   ",
-                    b"'    ",
-                    b'"    ',
-                    # some tests of the quoting rules
-                    ## b"'abc\"\''",
-                    ## b"'\\\\a\'\'\'\\\'\\\\\''",
-                    ]
-        for b in insecure:
-            buf = b"S" + b + b"\012p0\012."
-            self.assertRaises(ValueError, self.loads, buf)
-
     def test_unicode(self):
         endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
                     '<\\>', '<\\\U00012345>',
@@ -1214,6 +1190,35 @@ class AbstractPickleTests(unittest.TestCase):
         dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
         self.assertRaises(ValueError, self.loads, dumped)
 
+    def test_badly_escaped_string(self):
+        self.assertRaises(ValueError, self.loads, b"S'\\'\n.")
+
+    def test_badly_quoted_string(self):
+        # Issue #17710
+        badpickles = [b"S'\n.",
+                      b'S"\n.',
+                      b'S\' \n.',
+                      b'S" \n.',
+                      b'S\'"\n.',
+                      b'S"\'\n.',
+                      b"S' ' \n.",
+                      b'S" " \n.',
+                      b"S ''\n.",
+                      b'S ""\n.',
+                      b'S \n.',
+                      b'S\n.',
+                      b'S.']
+        for p in badpickles:
+            self.assertRaises(pickle.UnpicklingError, self.loads, p)
+
+    def test_correctly_quoted_string(self):
+        goodpickles = [(b"S''\n.", ''),
+                       (b'S""\n.', ''),
+                       (b'S"\\n"\n.', '\n'),
+                       (b"S'\\n'\n.", '\n')]
+        for p, expected in goodpickles:
+            self.assertEqual(self.loads(p), expected)
+
 
 class BigmemPickleTests(unittest.TestCase):
 
index 2c83185dde9505cfc02859cd09f32c209c117e2c..fbbb74522e1e2df117695e7e43ce946381ccbd30 100644 (file)
@@ -4205,36 +4205,23 @@ load_string(UnpicklerObject *self)
 
     if ((len = _Unpickler_Readline(self, &s)) < 0)
         return -1;
-    if (len < 2)
-        return bad_readline();
-    if ((s = strdup(s)) == NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-
+    /* Strip the newline */
+    len--;
     /* Strip outermost quotes */
-    while (len > 0 && s[len - 1] <= ' ')
-        len--;
-    if (len > 1 && s[0] == '"' && s[len - 1] == '"') {
-        s[len - 1] = '\0';
-        p = s + 1;
-        len -= 2;
-    }
-    else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') {
-        s[len - 1] = '\0';
+    if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
         p = s + 1;
         len -= 2;
     }
     else {
-        free(s);
-        PyErr_SetString(PyExc_ValueError, "insecure string pickle");
+        PyErr_SetString(UnpicklingError,
+                        "the STRING opcode argument must be quoted");
         return -1;
     }
+    assert(len >= 0);
 
     /* Use the PyBytes API to decode the string, since that is what is used
        to encode, and then coerce the result to Unicode. */
     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
-    free(s);
     if (bytes == NULL)
         return -1;
     str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);