Patch #505705: Remove eval in pickle and cPickle.

author Martin v. Löwis <martin@v.loewis.de>

Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)
diff --git a/Include/stringobject.h b/Include/stringobject.h

index abc8fad625eda2f847bb3fb6a0236e11f5749671..fd0f49a3109b000066a6c105da1e85fc80804961 100644 (file)
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@@ -53,6 +53,7 @@ PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...)
                                 __attribute__((format(printf, 1, 2)));
  PyAPI_FUNC(int) PyString_Size(PyObject *);
  PyAPI_FUNC(char *) PyString_AsString(PyObject *);
+PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
  PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
  PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
  PyAPI_FUNC(int) _PyString_Resize(PyObject **, int);
@@ -60,6 +61,9 @@ PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
  PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
  PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
                                                   int, char**, int*);
+extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int, 
+                                                  const char *, int,
+                                                  const char *);
  
  PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
  PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
diff --git a/Lib/encodings/string_escape.py b/Lib/encodings/string_escape.py

new file mode 100644 (file)

index 0000000..0e9a17f
--- /dev/null
+++ b/Lib/encodings/string_escape.py
@@ -0,0 +1,23 @@
+# -*- coding: iso-8859-1 -*-
+""" Python 'escape' Codec
+
+
+Written by Martin v. Löwis (martin@v.loewis.de).
+
+"""
+import codecs
+
+class Codec(codecs.Codec):
+
+    encode = codecs.escape_encode
+    decode = codecs.escape_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+def getregentry():
+
+    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
diff --git a/Lib/pickle.py b/Lib/pickle.py

index a507595203e7ec55525b5ac0db87c41ca4597fff..4bc54ec5f9de96f222f502430d3df4ffb6567307 100644 (file)
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -126,6 +126,8 @@ FALSE           = 'I00\n'
  __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
  del x
  
+_quotes = ["'", '"']
+
  class Pickler:
  
      def __init__(self, file, bin = 0):
@@ -740,10 +742,15 @@ class Unpickler:
  
      def load_string(self):
          rep = self.readline()[:-1]
-        if not self._is_string_secure(rep):
+        for q in _quotes:
+            if rep.startswith(q):
+                if not rep.endswith(q):
+                    raise ValueError, "insecure string pickle"
+                rep = rep[len(q):-len(q)]
+                break
+        else:
              raise ValueError, "insecure string pickle"
-        self.append(eval(rep,
-                         {'__builtins__': {}})) # Let's be careful
+        self.append(rep.decode("string-escape"))
      dispatch[STRING] = load_string
  
      def _is_string_secure(self, s):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py

index eb97a9cfa0aaee85b54a6a8f480bec8e32a88108..3dc7901c0be06271fc5f070263e19dc788a1b874 100644 (file)
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -195,13 +195,13 @@ class AbstractPickleTests(unittest.TestCase):
  
      def test_insecure_strings(self):
          insecure = ["abc", "2 + 2", # not quoted
-                    "'abc' + 'def'", # not a single quoted string
+                    #"'abc' + 'def'", # not a single quoted string
                      "'abc", # quote is not closed
                      "'abc\"", # open quote and close quote don't match
                      "'abc'   ?", # junk after close quote
                      # some tests of the quoting rules
-                    "'abc\"\''",
-                    "'\\\\a\'\'\'\\\'\\\\\''",
+                    #"'abc\"\''",
+                    #"'\\\\a\'\'\'\\\'\\\\\''",
                      ]
          for s in insecure:
              buf = "S" + s + "\012p0\012."
diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c

index d663293e885a245fb7c68587ca08ee0e7494ea03..1e3fc5d5b8a1049b60d4ab6ce4eada7a7edeb968 100644 (file)
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@@ -71,7 +71,6 @@ PyObject *codeclookup(PyObject *self, PyObject *args)
      return NULL;
  }
  
-#ifdef Py_USING_UNICODE
  /* --- Helpers ------------------------------------------------------------ */
  
  static
@@ -97,6 +96,49 @@ PyObject *codec_tuple(PyObject *unicode,
      return v;
  }
  
+/* --- String codecs ------------------------------------------------------ */
+static PyObject *
+escape_decode(PyObject *self,
+             PyObject *args)
+{
+    const char *errors = NULL;
+    const char *data;
+    int size;
+    
+    if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
+                         &data, &size, &errors))
+       return NULL;
+    return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), 
+                      size);
+}
+
+static PyObject *
+escape_encode(PyObject *self,
+             PyObject *args)
+{
+       PyObject *str;
+       const char *errors = NULL;
+       char *buf;
+       int len;
+
+       if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
+                             &PyString_Type, &str, &errors))
+               return NULL;
+
+       str = PyString_Repr(str, 0);
+       if (!str)
+               return NULL;
+
+       /* The string will be quoted. Unquote, similar to unicode-escape. */
+       buf = PyString_AS_STRING (str);
+       len = PyString_GET_SIZE (str);
+       memmove(buf, buf+1, len-2);
+       _PyString_Resize(&str, len-2);
+       
+       return codec_tuple(str, PyString_Size(str));
+}
+
+#ifdef Py_USING_UNICODE
  /* --- Decoder ------------------------------------------------------------ */
  
  static PyObject *
@@ -669,6 +711,8 @@ mbcs_encode(PyObject *self,
  static PyMethodDef _codecs_functions[] = {
      {"register",               codecregister,                  METH_VARARGS},
      {"lookup",                 codeclookup,                    METH_VARARGS},
+    {"escape_encode",          escape_encode,                  METH_VARARGS},
+    {"escape_decode",          escape_decode,                  METH_VARARGS},
  #ifdef Py_USING_UNICODE
      {"utf_8_encode",           utf_8_encode,                   METH_VARARGS},
      {"utf_8_decode",           utf_8_decode,                   METH_VARARGS},
diff --git a/Modules/cPickle.c b/Modules/cPickle.c

index d1f7867f8aaa16c5f2b14078946906f237aa7051..14936a6805c648be8f75fd3cd503e7c74dbd2bdc 100644 (file)
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@@ -2864,46 +2864,35 @@ static int
  load_string(Unpicklerobject *self) 
  {
         PyObject *str = 0;
-       int len, res = -1, nslash;
-       char *s, q, *p;
-
-       static PyObject *eval_dict = 0;
+       int len, res = -1;
+       char *s, *p;
  
         if ((len = (*self->readline_func)(self, &s)) < 0) return -1;
         if (len < 2) return bad_readline();
         if (!( s=pystrndup(s,len)))  return -1;
  
-       /* Check for unquoted quotes (evil strings) */
-       q=*s;
-       if (q != '"' && q != '\'') goto insecure;
-       for (p=s+1, nslash=0; *p; p++) {
-               if (*p==q && nslash%2==0) break;
-               if (*p=='\\') nslash++;
-               else nslash=0;
-       }
-       if (*p == q) {
-               for (p++; *p; p++)
-                       if (*(unsigned char *)p > ' ')
-                               goto insecure;
-       }
-       else
+
+       /* Strip outermost quotes */
+       while (s[len-1] <= ' ')
+               len--;
+       if(s[0]=='"' && s[len-1]=='"'){
+               s[len-1] = '\0';
+               p = s + 1 ;
+               len -= 2;
+       } else if(s[0]=='\'' && s[len-1]=='\''){
+               s[len-1] = '\0';
+               p = s + 1 ;
+               len -= 2;
+       } else
                 goto insecure;
         /********************************************/
  
-       if (!( eval_dict )) 
-               if (!( eval_dict = Py_BuildValue("{s{}}", "__builtins__"))) 
-                       goto finally;
-
-       if (!( str = PyRun_String(s, Py_eval_input, eval_dict, eval_dict))) 
-               goto finally;
-
-       free(s);
-       PDATA_PUSH(self->stack, str, -1);
-       return 0;
-
-  finally:
+       str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
+       if (str) {
+               PDATA_PUSH(self->stack, str, -1);
+               res = 0;
+       }
         free(s);
-
         return res;
  
    insecure:
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 1bbd201047f7daac21e74e51f4957999cd143bd6..19c28346d31ae372df93d432416fec3892ace9b1 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -489,6 +489,152 @@ string_dealloc(PyObject *op)
         op->ob_type->tp_free(op);
  }
  
+/* Unescape a backslash-escaped string. If unicode is non-zero,
+   the string is a u-literal. If recode_encoding is non-zero,
+   the string is UTF-8 encoded and should be re-encoded in the
+   specified encoding.  */
+
+PyObject *PyString_DecodeEscape(const char *s,
+                               int len,
+                               const char *errors,
+                               int unicode,
+                               const char *recode_encoding)
+{
+       int c;
+       char *p, *buf;
+       const char *end;
+       PyObject *v;
+       v = PyString_FromStringAndSize((char *)NULL, 
+                                      recode_encoding ? 4*len:len);
+       if (v == NULL)
+               return NULL;
+       p = buf = PyString_AsString(v);
+       end = s + len;
+       while (s < end) {
+               if (*s != '\\') {
+#ifdef Py_USING_UNICODE
+                       if (recode_encoding && (*s & 0x80)) {
+                               PyObject *u, *w;
+                               char *r;
+                               const char* t;
+                               int rn;
+                               t = s;
+                               /* Decode non-ASCII bytes as UTF-8. */
+                               while (t < end && (*t & 0x80)) t++;
+                               u = PyUnicode_DecodeUTF8(s, t - s, errors);
+                               if(!u) goto failed;
+
+                               /* Recode them in target encoding. */
+                               w = PyUnicode_AsEncodedString(
+                                       u, recode_encoding, errors);
+                               Py_DECREF(u);
+                               if (!w) goto failed;
+
+                               /* Append bytes to output buffer. */
+                               r = PyString_AsString(w);
+                               rn = PyString_Size(w);
+                               memcpy(p, r, rn);
+                               p += rn;
+                               Py_DECREF(w);
+                               s = t;
+                       } else {
+                               *p++ = *s++;
+                       }
+#else
+                       *p++ = *s++;
+#endif
+                       continue;
+               }
+               s++;
+               switch (*s++) {
+               /* XXX This assumes ASCII! */
+               case '\n': break;
+               case '\\': *p++ = '\\'; break;
+               case '\'': *p++ = '\''; break;
+               case '\"': *p++ = '\"'; break;
+               case 'b': *p++ = '\b'; break;
+               case 'f': *p++ = '\014'; break; /* FF */
+               case 't': *p++ = '\t'; break;
+               case 'n': *p++ = '\n'; break;
+               case 'r': *p++ = '\r'; break;
+               case 'v': *p++ = '\013'; break; /* VT */
+               case 'a': *p++ = '\007'; break; /* BEL, not classic C */
+               case '0': case '1': case '2': case '3':
+               case '4': case '5': case '6': case '7':
+                       c = s[-1] - '0';
+                       if ('0' <= *s && *s <= '7') {
+                               c = (c<<3) + *s++ - '0';
+                               if ('0' <= *s && *s <= '7')
+                                       c = (c<<3) + *s++ - '0';
+                       }
+                       *p++ = c;
+                       break;
+               case 'x':
+                       if (isxdigit(Py_CHARMASK(s[0])) 
+                           && isxdigit(Py_CHARMASK(s[1]))) {
+                               unsigned int x = 0;
+                               c = Py_CHARMASK(*s);
+                               s++;
+                               if (isdigit(c))
+                                       x = c - '0';
+                               else if (islower(c))
+                                       x = 10 + c - 'a';
+                               else
+                                       x = 10 + c - 'A';
+                               x = x << 4;
+                               c = Py_CHARMASK(*s);
+                               s++;
+                               if (isdigit(c))
+                                       x += c - '0';
+                               else if (islower(c))
+                                       x += 10 + c - 'a';
+                               else
+                                       x += 10 + c - 'A';
+                               *p++ = x;
+                               break;
+                       }
+                       if (!errors || strcmp(errors, "strict") == 0) {
+                               Py_DECREF(v);
+                               PyErr_SetString(PyExc_ValueError, 
+                                               "invalid \\x escape");
+                               return NULL;
+                       }
+                       if (strcmp(errors, "replace") == 0) {
+                               *p++ = '?';
+                       } else if (strcmp(errors, "ignore") == 0)
+                               /* do nothing */;
+                       else {
+                               PyErr_Format(PyExc_ValueError,
+                                            "decoding error; "
+                                            "unknown error handling code: %.400s",
+                                            errors);
+                               return NULL;
+                       }
+#ifndef Py_USING_UNICODE
+               case 'u':
+               case 'U':
+               case 'N':
+                       if (unicode) {
+                               Py_DECREF(v);
+                               com_error(com, PyExc_ValueError,
+                                         "Unicode escapes not legal "
+                                         "when Unicode disabled");
+                               return NULL;
+                       }
+#endif
+               default:
+                       *p++ = '\\';
+                       *p++ = s[-1];
+                       break;
+               }
+       }
+       _PyString_Resize(&v, (int)(p - buf));
+       return v;
+  failed:
+       Py_DECREF(v);
+       return NULL;
+}
+
  static int
  string_getsize(register PyObject *op)
  {
@@ -614,9 +760,10 @@ string_print(PyStringObject *op, FILE *fp, int flags)
         return 0;
  }
  
-static PyObject *
-string_repr(register PyStringObject *op)
+PyObject *
+PyString_Repr(PyObject *obj, int smartquotes)
  {
+       register PyStringObject* op = (PyStringObject*) obj;
         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
         PyObject *v;
         if (newsize > INT_MAX) {
@@ -635,7 +782,8 @@ string_repr(register PyStringObject *op)
  
                 /* figure out which quote to use; single is preferred */
                 quote = '\'';
-               if (memchr(op->ob_sval, '\'', op->ob_size) &&
+               if (smartquotes && 
+                   memchr(op->ob_sval, '\'', op->ob_size) &&
                     !memchr(op->ob_sval, '"', op->ob_size))
                         quote = '"';
  
@@ -673,6 +821,12 @@ string_repr(register PyStringObject *op)
         }
  }
  
+static PyObject *
+string_repr(PyObject *op)
+{
+       return PyString_Repr(op, 1);
+}
+
  static PyObject *
  string_str(PyObject *s)
  {
diff --git a/Python/compile.c b/Python/compile.c

index b160f7361255aad67f0402579bded9ebfca671ab..d1655e954fcb39563973efcd45c99eca67171188 100644 (file)
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -1226,9 +1226,7 @@ parsestr(struct compiling *com, char *s)
         char *buf;
         char *p;
         char *end;
-       int c;
-       int first = *s;
-       int quote = first;
+       int quote = *s;
         int rawmode = 0;
         char* encoding = ((com == NULL) ? NULL : com->c_encoding);
         int need_encoding;
@@ -1347,102 +1345,11 @@ parsestr(struct compiling *com, char *s)
                         return PyString_FromStringAndSize(s, len);
                 }
         }
-       v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
-                                      need_encoding ? len * 4 : len);
+
+       v = PyString_DecodeEscape(s, len, NULL, unicode,
+                                 need_encoding ? encoding : NULL);
         if (v == NULL)
-               return NULL;
-       p = buf = PyString_AsString(v);
-       end = s + len;
-       while (s < end) {
-               if (*s != '\\') {
-                 ORDINAL: 
-                       if (need_encoding && (*s & 0x80)) {
-                               char *r;
-                               int rn;
-                               PyObject* w = decode_utf8(&s, end, encoding);
-                               if (w == NULL)
-                                       return NULL;
-                               r = PyString_AsString(w);
-                               rn = PyString_Size(w);
-                               memcpy(p, r, rn);
-                               p += rn;
-                               Py_DECREF(w);
-                       } else {
-                               *p++ = *s++;
-                       }
-                       continue;
-               }
-               s++;
-               switch (*s++) {
-               /* XXX This assumes ASCII! */
-               case '\n': break;
-               case '\\': *p++ = '\\'; break;
-               case '\'': *p++ = '\''; break;
-               case '\"': *p++ = '\"'; break;
-               case 'b': *p++ = '\b'; break;
-               case 'f': *p++ = '\014'; break; /* FF */
-               case 't': *p++ = '\t'; break;
-               case 'n': *p++ = '\n'; break;
-               case 'r': *p++ = '\r'; break;
-               case 'v': *p++ = '\013'; break; /* VT */
-               case 'a': *p++ = '\007'; break; /* BEL, not classic C */
-               case '0': case '1': case '2': case '3':
-               case '4': case '5': case '6': case '7':
-                       c = s[-1] - '0';
-                       if ('0' <= *s && *s <= '7') {
-                               c = (c<<3) + *s++ - '0';
-                               if ('0' <= *s && *s <= '7')
-                                       c = (c<<3) + *s++ - '0';
-                       }
-                       *p++ = c;
-                       break;
-               case 'x':
-                       if (isxdigit(Py_CHARMASK(s[0])) 
-                           && isxdigit(Py_CHARMASK(s[1]))) {
-                               unsigned int x = 0;
-                               c = Py_CHARMASK(*s);
-                               s++;
-                               if (isdigit(c))
-                                       x = c - '0';
-                               else if (islower(c))
-                                       x = 10 + c - 'a';
-                               else
-                                       x = 10 + c - 'A';
-                               x = x << 4;
-                               c = Py_CHARMASK(*s);
-                               s++;
-                               if (isdigit(c))
-                                       x += c - '0';
-                               else if (islower(c))
-                                       x += 10 + c - 'a';
-                               else
-                                       x += 10 + c - 'A';
-                               *p++ = x;
-                               break;
-                       }
-                       Py_DECREF(v);
-                       com_error(com, PyExc_ValueError, 
-                                 "invalid \\x escape");
-                       return NULL;
-#ifndef Py_USING_UNICODE
-               case 'u':
-               case 'U':
-               case 'N':
-                       if (unicode) {
-                               Py_DECREF(v);
-                               com_error(com, PyExc_ValueError,
-                                         "Unicode escapes not legal "
-                                         "when Unicode disabled");
-                               return NULL;
-                       }
-#endif
-               default:
-                       *p++ = '\\';
-                       s--;
-                       goto ORDINAL;
-               }
-       }
-       _PyString_Resize(&v, (int)(p - buf));
+               PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
         return v;
  }
author	Martin v. Löwis <martin@v.loewis.de>
	Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Wed, 14 Aug 2002 07:46:28 +0000 (07:46 +0000)
Include/stringobject.h		patch \| blob \| history
Lib/encodings/string_escape.py	[new file with mode: 0644]	patch \| blob
Lib/pickle.py		patch \| blob \| history
Lib/test/pickletester.py		patch \| blob \| history
Modules/_codecsmodule.c		patch \| blob \| history
Modules/cPickle.c		patch \| blob \| history
Objects/stringobject.c		patch \| blob \| history
Python/compile.c		patch \| blob \| history