]> granicus.if.org Git - python/commitdiff
bpo-30070: Fixed leaks and crashes in errors handling in the parser module. (#1131)
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 19 Apr 2017 18:09:21 +0000 (21:09 +0300)
committerGitHub <noreply@github.com>
Wed, 19 Apr 2017 18:09:21 +0000 (21:09 +0300)
Lib/test/test_parser.py
Misc/NEWS
Modules/parsermodule.c

index d6e6f7157782ec9872b154c958fa9bafc2ec54e2..70cabb285982180e13de060466519e8ecbe227d7 100644 (file)
@@ -1,4 +1,6 @@
+import copy
 import parser
+import pickle
 import unittest
 import operator
 import struct
@@ -424,6 +426,52 @@ class IllegalSyntaxTestCase(unittest.TestCase):
         # not even remotely valid:
         self.check_bad_tree((1, 2, 3), "<junk>")
 
+    def test_illegal_terminal(self):
+        tree = \
+            (257,
+             (269,
+              (270,
+               (271,
+                (277,
+                 (1,))),
+               (4, ''))),
+             (4, ''),
+             (0, ''))
+        self.check_bad_tree(tree, "too small items in terminal node")
+        tree = \
+            (257,
+             (269,
+              (270,
+               (271,
+                (277,
+                 (1, b'pass'))),
+               (4, ''))),
+             (4, ''),
+             (0, ''))
+        self.check_bad_tree(tree, "non-string second item in terminal node")
+        tree = \
+            (257,
+             (269,
+              (270,
+               (271,
+                (277,
+                 (1, 'pass', '0', 0))),
+               (4, ''))),
+             (4, ''),
+             (0, ''))
+        self.check_bad_tree(tree, "non-integer third item in terminal node")
+        tree = \
+            (257,
+             (269,
+              (270,
+               (271,
+                (277,
+                 (1, 'pass', 0, 0))),
+               (4, ''))),
+             (4, ''),
+             (0, ''))
+        self.check_bad_tree(tree, "too many items in terminal node")
+
     def test_illegal_yield_1(self):
         # Illegal yield statement: def f(): return 1; yield 1
         tree = \
@@ -628,6 +676,24 @@ class IllegalSyntaxTestCase(unittest.TestCase):
              (4, ''), (0, ''))
         self.check_bad_tree(tree, "from import fred")
 
+    def test_illegal_encoding(self):
+        # Illegal encoding declaration
+        tree = \
+            (339,
+             (257, (0, '')))
+        self.check_bad_tree(tree, "missed encoding")
+        tree = \
+            (339,
+             (257, (0, '')),
+              b'iso-8859-1')
+        self.check_bad_tree(tree, "non-string encoding")
+        tree = \
+            (339,
+             (257, (0, '')),
+              '\udcff')
+        with self.assertRaises(UnicodeEncodeError):
+            parser.sequence2st(tree)
+
 
 class CompileTestCase(unittest.TestCase):
 
@@ -772,6 +838,21 @@ class STObjectTestCase(unittest.TestCase):
         self.assertRaises(TypeError, operator.lt, st1, 1815)
         self.assertRaises(TypeError, operator.gt, b'waterloo', st2)
 
+    def test_copy_pickle(self):
+        sts = [
+            parser.expr('2 + 3'),
+            parser.suite('x = 2; y = x + 3'),
+            parser.expr('list(x**3 for x in range(20))')
+        ]
+        for st in sts:
+            st_copy = copy.copy(st)
+            self.assertEqual(st_copy.totuple(), st.totuple())
+            st_copy = copy.deepcopy(st)
+            self.assertEqual(st_copy.totuple(), st.totuple())
+            for proto in range(pickle.HIGHEST_PROTOCOL+1):
+                st_copy = pickle.loads(pickle.dumps(st, proto))
+                self.assertEqual(st_copy.totuple(), st.totuple())
+
     check_sizeof = support.check_sizeof
 
     @support.cpython_only
index e57faacf615daa14f88873b7b04327a8f64d8bab..3ec02e0e3e8ef6129ae2949432d88b5d04330825 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -313,6 +313,8 @@ Extension Modules
 Library
 -------
 
+- bpo-30070: Fixed leaks and crashes in errors handling in the parser module.
+
 - bpo-22352: Column widths in the output of dis.dis() are now adjusted for
   large line numbers and instruction offsets.
 
index e5c4db3048c524792afd71c057cf9a0aa5d0527e..929f2deb16c428655cc7f088e80bdf5fc9db7dfd 100644 (file)
@@ -697,7 +697,7 @@ validate_node(node *tree)
             short a_label = dfa_state->s_arc[arc].a_lbl;
             assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels);
             if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) {
-               /* The child is acceptable; if non-terminal, validate it recursively. */
+                /* The child is acceptable; if non-terminal, validate it recursively. */
                 if (ISNONTERMINAL(ch_type) && !validate_node(ch))
                     return 0;
 
@@ -775,32 +775,35 @@ parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
      */
     tree = build_node_tree(tuple);
     if (tree != 0) {
-        node *validation_root = tree;
+        node *validation_root = NULL;
         int tree_type = 0;
         switch (TYPE(tree)) {
         case eval_input:
             /*  Might be an eval form.  */
             tree_type = PyST_EXPR;
+            validation_root = tree;
             break;
         case encoding_decl:
             /* This looks like an encoding_decl so far. */
-            if (NCH(tree) != 1)
+            if (NCH(tree) == 1) {
+                tree_type = PyST_SUITE;
+                validation_root = CHILD(tree, 0);
+            }
+            else {
                 err_string("Error Parsing encoding_decl");
-            validation_root = CHILD(tree, 0);
-            /* Fall through */
+            }
+            break;
         case file_input:
             /*  This looks like an exec form so far.  */
-
             tree_type = PyST_SUITE;
+            validation_root = tree;
             break;
         default:
             /*  This is a fragment, at best. */
-            PyNode_Free(tree);
             err_string("parse tree does not use a valid start symbol");
-            return (0);
         }
 
-        if (validate_node(validation_root))
+        if (validation_root != NULL && validate_node(validation_root))
             st = parser_newstobject(tree, tree_type);
         else
             PyNode_Free(tree);
@@ -830,6 +833,9 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
     Py_ssize_t i;
     int  err;
 
+    if (len < 0) {
+        return NULL;
+    }
     for (i = 1; i < len; ++i) {
         /* elem must always be a sequence, however simple */
         PyObject* elem = PySequence_GetItem(tuple, i);
@@ -850,7 +856,7 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
                     if (type == -1 && PyErr_Occurred()) {
                         Py_DECREF(temp);
                         Py_DECREF(elem);
-                        return 0;
+                        return NULL;
                     }
                 }
                 Py_DECREF(temp);
@@ -862,7 +868,7 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
             PyErr_SetObject(parser_error, err);
             Py_XDECREF(err);
             Py_XDECREF(elem);
-            return (0);
+            return NULL;
         }
         if (ISTERMINAL(type)) {
             Py_ssize_t len = PyObject_Size(elem);
@@ -871,11 +877,14 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
 
             if ((len != 2) && (len != 3)) {
                 err_string("terminal nodes must have 2 or 3 entries");
-                return 0;
+                Py_DECREF(elem);
+                return NULL;
             }
             temp = PySequence_GetItem(elem, 1);
-            if (temp == NULL)
-                return 0;
+            if (temp == NULL) {
+                Py_DECREF(elem);
+                return NULL;
+            }
             if (!PyUnicode_Check(temp)) {
                 PyErr_Format(parser_error,
                              "second item in terminal node must be a string,"
@@ -883,46 +892,49 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
                              Py_TYPE(temp)->tp_name);
                 Py_DECREF(temp);
                 Py_DECREF(elem);
-                return 0;
+                return NULL;
             }
             if (len == 3) {
                 PyObject *o = PySequence_GetItem(elem, 2);
-                if (o != NULL) {
-                    if (PyLong_Check(o)) {
-                        int num = _PyLong_AsInt(o);
-                        if (num == -1 && PyErr_Occurred()) {
-                            Py_DECREF(o);
-                            Py_DECREF(temp);
-                            Py_DECREF(elem);
-                            return 0;
-                        }
-                        *line_num = num;
-                    }
-                    else {
-                        PyErr_Format(parser_error,
-                                     "third item in terminal node must be an"
-                                     " integer, found %s",
-                                     Py_TYPE(temp)->tp_name);
+                if (o == NULL) {
+                    Py_DECREF(temp);
+                    Py_DECREF(elem);
+                    return NULL;
+                }
+                if (PyLong_Check(o)) {
+                    int num = _PyLong_AsInt(o);
+                    if (num == -1 && PyErr_Occurred()) {
                         Py_DECREF(o);
                         Py_DECREF(temp);
                         Py_DECREF(elem);
-                        return 0;
+                        return NULL;
                     }
+                    *line_num = num;
+                }
+                else {
+                    PyErr_Format(parser_error,
+                                 "third item in terminal node must be an"
+                                 " integer, found %s",
+                                 Py_TYPE(temp)->tp_name);
                     Py_DECREF(o);
+                    Py_DECREF(temp);
+                    Py_DECREF(elem);
+                    return NULL;
                 }
+                Py_DECREF(o);
             }
             temp_str = PyUnicode_AsUTF8AndSize(temp, &len);
             if (temp_str == NULL) {
                 Py_DECREF(temp);
-                Py_XDECREF(elem);
-                return 0;
+                Py_DECREF(elem);
+                return NULL;
             }
             strn = (char *)PyObject_MALLOC(len + 1);
             if (strn == NULL) {
                 Py_DECREF(temp);
-                Py_XDECREF(elem);
+                Py_DECREF(elem);
                 PyErr_NoMemory();
-                return 0;
+                return NULL;
             }
             (void) memcpy(strn, temp_str, len + 1);
             Py_DECREF(temp);
@@ -932,20 +944,21 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
              *  It has to be one or the other; this is an error.
              *  Raise an exception.
              */
-            PyObject *err = Py_BuildValue("os", elem, "unknown node type.");
+            PyObject *err = Py_BuildValue("Os", elem, "unknown node type.");
             PyErr_SetObject(parser_error, err);
             Py_XDECREF(err);
-            Py_XDECREF(elem);
-            return (0);
+            Py_DECREF(elem);
+            return NULL;
         }
         err = PyNode_AddChild(root, type, strn, *line_num, 0);
         if (err == E_NOMEM) {
-            Py_XDECREF(elem);
+            Py_DECREF(elem);
             PyObject_FREE(strn);
-            return (node *) PyErr_NoMemory();
+            PyErr_NoMemory();
+            return NULL;
         }
         if (err == E_OVERFLOW) {
-            Py_XDECREF(elem);
+            Py_DECREF(elem);
             PyObject_FREE(strn);
             PyErr_SetString(PyExc_ValueError,
                             "unsupported number of child nodes");
@@ -956,14 +969,14 @@ build_node_children(PyObject *tuple, node *root, int *line_num)
             node* new_child = CHILD(root, i - 1);
 
             if (new_child != build_node_children(elem, new_child, line_num)) {
-                Py_XDECREF(elem);
-                return (0);
+                Py_DECREF(elem);
+                return NULL;
             }
         }
         else if (type == NEWLINE) {     /* It's true:  we increment the     */
             ++(*line_num);              /* line number *after* the newline! */
         }
-        Py_XDECREF(elem);
+        Py_DECREF(elem);
     }
     return root;
 }
@@ -998,10 +1011,23 @@ build_node_tree(PyObject *tuple)
 
         if (num == encoding_decl) {
             encoding = PySequence_GetItem(tuple, 2);
+            if (encoding == NULL) {
+                PyErr_SetString(parser_error, "missed encoding");
+                return NULL;
+            }
+            if (!PyUnicode_Check(encoding)) {
+                PyErr_Format(parser_error,
+                             "encoding must be a string, found %.200s",
+                             Py_TYPE(encoding)->tp_name);
+                Py_DECREF(encoding);
+                return NULL;
+            }
             /* tuple isn't borrowed anymore here, need to DECREF */
             tuple = PySequence_GetSlice(tuple, 0, 2);
-            if (tuple == NULL)
+            if (tuple == NULL) {
+                Py_DECREF(encoding);
                 return NULL;
+            }
         }
         res = PyNode_New(num);
         if (res != NULL) {
@@ -1014,31 +1040,33 @@ build_node_tree(PyObject *tuple)
                 const char *temp;
                 temp = PyUnicode_AsUTF8AndSize(encoding, &len);
                 if (temp == NULL) {
-                    Py_DECREF(res);
+                    PyNode_Free(res);
                     Py_DECREF(encoding);
                     Py_DECREF(tuple);
                     return NULL;
                 }
                 res->n_str = (char *)PyObject_MALLOC(len + 1);
                 if (res->n_str == NULL) {
-                    Py_DECREF(res);
+                    PyNode_Free(res);
                     Py_DECREF(encoding);
                     Py_DECREF(tuple);
                     PyErr_NoMemory();
                     return NULL;
                 }
                 (void) memcpy(res->n_str, temp, len + 1);
-                Py_DECREF(encoding);
-                Py_DECREF(tuple);
             }
         }
+        if (encoding != NULL) {
+            Py_DECREF(encoding);
+            Py_DECREF(tuple);
+        }
     }
     else {
         /*  The tuple is illegal -- if the number is neither TERMINAL nor
          *  NONTERMINAL, we can't use it.  Not sure the implementation
          *  allows this condition, but the API doesn't preclude it.
          */
-        PyObject *err = Py_BuildValue("os", tuple,
+        PyObject *err = Py_BuildValue("Os", tuple,
                                       "Illegal component tuple.");
         PyErr_SetObject(parser_error, err);
         Py_XDECREF(err);
@@ -1073,7 +1101,6 @@ parser__pickler(PyObject *self, PyObject *args)
             result = Py_BuildValue("O(O)", pickle_constructor, tuple);
             Py_DECREF(tuple);
         }
-        Py_DECREF(empty_dict);
         Py_DECREF(newargs);
     }
   finally: