]> granicus.if.org Git - python/commitdiff
Issue #24802: Copy bytes-like objects to null-terminated buffers if necessary
authorMartin Panter <vadmium+py@gmail.com>
Sat, 7 Nov 2015 02:32:21 +0000 (02:32 +0000)
committerMartin Panter <vadmium+py@gmail.com>
Sat, 7 Nov 2015 02:32:21 +0000 (02:32 +0000)
This avoids possible buffer overreads when int(), float(), compile(), exec()
and eval() are passed bytes-like objects. Similar code is removed from the
complex() constructor, where it was not reachable.

Patch by John Leitch, Serhiy Storchaka and Martin Panter.

Lib/test/test_compile.py
Lib/test/test_float.py
Lib/test/test_int.py
Misc/NEWS
Objects/abstract.c
Objects/complexobject.c
Objects/floatobject.c
Python/bltinmodule.c

index cff3c9ea0b5ed6f141c4ea6b2fdd583dcb9db3ba..2affcc92c3329f645fda916a07cf891cc3419bdf 100644 (file)
@@ -530,6 +530,27 @@ if 1:
         check_limit("a", "[0]")
         check_limit("a", "*a")
 
+    def test_null_terminated(self):
+        # The source code is null-terminated internally, but bytes-like
+        # objects are accepted, which could be not terminated.
+        # Exception changed from TypeError to ValueError in 3.5
+        with self.assertRaisesRegex(Exception, "cannot contain null"):
+            compile("123\x00", "<dummy>", "eval")
+        with self.assertRaisesRegex(Exception, "cannot contain null"):
+            compile(memoryview(b"123\x00"), "<dummy>", "eval")
+        code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+        code = compile(memoryview(b"1234")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+        code = compile(memoryview(b"$23$")[1:-1], "<dummy>", "eval")
+        self.assertEqual(eval(code), 23)
+
+        # Also test when eval() and exec() do the compilation step
+        self.assertEqual(eval(memoryview(b"1234")[1:-1]), 23)
+        namespace = dict()
+        exec(memoryview(b"ax = 123")[1:-1], namespace)
+        self.assertEqual(namespace['x'], 12)
+
 
 class TestStackSize(unittest.TestCase):
     # These tests check that the computed stack size for a code object
index e87aab0dd396cd35d0a2dc09e4244349d1e32e66..504f39c4eedd786ba87e75da40cd43a30d5f7da4 100644 (file)
@@ -31,7 +31,6 @@ class GeneralFloatCases(unittest.TestCase):
         self.assertEqual(float(3.14), 3.14)
         self.assertEqual(float(314), 314.0)
         self.assertEqual(float("  3.14  "), 3.14)
-        self.assertEqual(float(b" 3.14  "), 3.14)
         self.assertRaises(ValueError, float, "  0x3.1  ")
         self.assertRaises(ValueError, float, "  -0x3.p-1  ")
         self.assertRaises(ValueError, float, "  +0x3.p-1  ")
@@ -43,7 +42,6 @@ class GeneralFloatCases(unittest.TestCase):
         self.assertRaises(ValueError, float, "+.inf")
         self.assertRaises(ValueError, float, ".")
         self.assertRaises(ValueError, float, "-.")
-        self.assertRaises(ValueError, float, b"-")
         self.assertRaises(TypeError, float, {})
         self.assertRaisesRegex(TypeError, "not 'dict'", float, {})
         # Lone surrogate
@@ -57,6 +55,42 @@ class GeneralFloatCases(unittest.TestCase):
         float(b'.' + b'1'*1000)
         float('.' + '1'*1000)
 
+    def test_non_numeric_input_types(self):
+        # Test possible non-numeric types for the argument x, including
+        # subclasses of the explicitly documented accepted types.
+        class CustomStr(str): pass
+        class CustomBytes(bytes): pass
+        class CustomByteArray(bytearray): pass
+
+        factories = [
+            bytes,
+            bytearray,
+            lambda b: CustomStr(b.decode()),
+            CustomBytes,
+            CustomByteArray,
+            memoryview,
+        ]
+        try:
+            from array import array
+        except ImportError:
+            pass
+        else:
+            factories.append(lambda b: array('B', b))
+
+        for f in factories:
+            x = f(b" 3.14  ")
+            with self.subTest(type(x)):
+                self.assertEqual(float(x), 3.14)
+                with self.assertRaisesRegex(ValueError, "could not convert"):
+                    float(f(b'A' * 0x10))
+
+    def test_float_memoryview(self):
+        self.assertEqual(float(memoryview(b'12.3')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3\x00')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3 ')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.3A')[1:4]), 2.3)
+        self.assertEqual(float(memoryview(b'12.34')[1:4]), 2.3)
+
     def test_error_message(self):
         testlist = ('\xbd', '123\xbd', '  123 456  ')
         for s in testlist:
index e94602e88dd0785af11171074ce315e6d6d6f143..ab3917fa9cfd8f8d32cd175cde31cc62e4f38e46 100644 (file)
@@ -276,16 +276,40 @@ class IntTestCases(unittest.TestCase):
         class CustomBytes(bytes): pass
         class CustomByteArray(bytearray): pass
 
-        values = [b'100',
-                  bytearray(b'100'),
-                  CustomStr('100'),
-                  CustomBytes(b'100'),
-                  CustomByteArray(b'100')]
-
-        for x in values:
-            msg = 'x has type %s' % type(x).__name__
-            self.assertEqual(int(x), 100, msg=msg)
-            self.assertEqual(int(x, 2), 4, msg=msg)
+        factories = [
+            bytes,
+            bytearray,
+            lambda b: CustomStr(b.decode()),
+            CustomBytes,
+            CustomByteArray,
+            memoryview,
+        ]
+        try:
+            from array import array
+        except ImportError:
+            pass
+        else:
+            factories.append(lambda b: array('B', b))
+
+        for f in factories:
+            x = f(b'100')
+            with self.subTest(type(x)):
+                self.assertEqual(int(x), 100)
+                if isinstance(x, (str, bytes, bytearray)):
+                    self.assertEqual(int(x, 2), 4)
+                else:
+                    msg = "can't convert non-string"
+                    with self.assertRaisesRegex(TypeError, msg):
+                        int(x, 2)
+                with self.assertRaisesRegex(ValueError, 'invalid literal'):
+                    int(f(b'A' * 0x10))
+
+    def test_int_memoryview(self):
+        self.assertEqual(int(memoryview(b'123')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123 ')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'123A')[1:3]), 23)
+        self.assertEqual(int(memoryview(b'1234')[1:3]), 23)
 
     def test_string_float(self):
         self.assertRaises(ValueError, int, '1.2')
index d47737051f1552df672b64791b196a925bb440bd..a0c1a6d2d0c7b22d4392adbf0a18003df09f7f1f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ Release date: tba
 Core and Builtins
 -----------------
 
+- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec()
+  and eval() are passed bytes-like objects.  These objects are not
+  necessarily terminated by a null byte, but the functions assumed they were.
+
 - Issue #24402: Fix input() to prompt to the redirected stdout when
   sys.stdout.fileno() fails.
 
index a20a84cbb71f475e3555de4aed3ee4e1706e4350..5e9613816a834f6b73f508c092dc81f6d42a3c6e 100644 (file)
@@ -1264,12 +1264,30 @@ PyNumber_Long(PyObject *o)
         /* The below check is done in PyLong_FromUnicode(). */
         return PyLong_FromUnicodeObject(o, 10);
 
-    if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) {
+    if (PyBytes_Check(o))
         /* need to do extra error checking that PyLong_FromString()
          * doesn't do.  In particular int('9\x005') must raise an
          * exception, not truncate at the null.
          */
-        PyObject *result = _PyLong_FromBytes(view.buf, view.len, 10);
+        return _PyLong_FromBytes(PyBytes_AS_STRING(o),
+                                 PyBytes_GET_SIZE(o), 10);
+
+    if (PyByteArray_Check(o))
+        return _PyLong_FromBytes(PyByteArray_AS_STRING(o),
+                                 PyByteArray_GET_SIZE(o), 10);
+
+    if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) {
+        PyObject *result, *bytes;
+
+        /* Copy to NUL-terminated buffer. */
+        bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len);
+        if (bytes == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes),
+                                   PyBytes_GET_SIZE(bytes), 10);
+        Py_DECREF(bytes);
         PyBuffer_Release(&view);
         return result;
     }
index 7f4cdd9b3507eac66eb3d71641c7db27929ca726..7aaaeab7d24719115b6e4f47781ee046249bc856 100644 (file)
@@ -767,7 +767,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
     int got_bracket=0;
     PyObject *s_buffer = NULL;
     Py_ssize_t len;
-    Py_buffer view = {NULL, NULL};
 
     if (PyUnicode_Check(v)) {
         s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v);
@@ -777,10 +776,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
         if (s == NULL)
             goto error;
     }
-    else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
-        s = (const char *)view.buf;
-        len = view.len;
-    }
     else {
         PyErr_Format(PyExc_TypeError,
             "complex() argument must be a string or a number, not '%.200s'",
@@ -895,7 +890,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
     if (s-start != len)
         goto parse_error;
 
-    PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return complex_subtype_from_doubles(type, x, y);
 
@@ -903,7 +897,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
     PyErr_SetString(PyExc_ValueError,
                     "complex() arg is a malformed string");
   error:
-    PyBuffer_Release(&view);
     Py_XDECREF(s_buffer);
     return NULL;
 }
index 1dca94790641fbc3ef0dbbd148567f3b621fb7e6..9c1b714af328f9f544bf6f352cc5b9a87f9e6a49 100644 (file)
@@ -144,9 +144,24 @@ PyFloat_FromString(PyObject *v)
             return NULL;
         }
     }
+    else if (PyBytes_Check(v)) {
+        s = PyBytes_AS_STRING(v);
+        len = PyBytes_GET_SIZE(v);
+    }
+    else if (PyByteArray_Check(v)) {
+        s = PyByteArray_AS_STRING(v);
+        len = PyByteArray_GET_SIZE(v);
+    }
     else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) {
         s = (const char *)view.buf;
         len = view.len;
+        /* Copy to NUL-terminated buffer. */
+        s_buffer = PyBytes_FromStringAndSize(s, len);
+        if (s_buffer == NULL) {
+            PyBuffer_Release(&view);
+            return NULL;
+        }
+        s = PyBytes_AS_STRING(s_buffer);
     }
     else {
         PyErr_Format(PyExc_TypeError,
index aed93e5352318a28aad510aa8b7d221f88c8ead9..53009e3fbdc77dc6b9cc67902b548a4fe8ca0363 100644 (file)
@@ -560,20 +560,37 @@ Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.");
 
 
 static const char *
-source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, Py_buffer *view)
+source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy)
 {
     const char *str;
     Py_ssize_t size;
+    Py_buffer view;
 
+    *cmd_copy = NULL;
     if (PyUnicode_Check(cmd)) {
         cf->cf_flags |= PyCF_IGNORE_COOKIE;
         str = PyUnicode_AsUTF8AndSize(cmd, &size);
         if (str == NULL)
             return NULL;
     }
-    else if (PyObject_GetBuffer(cmd, view, PyBUF_SIMPLE) == 0) {
-        str = (const char *)view->buf;
-        size = view->len;
+    else if (PyBytes_Check(cmd)) {
+        str = PyBytes_AS_STRING(cmd);
+        size = PyBytes_GET_SIZE(cmd);
+    }
+    else if (PyByteArray_Check(cmd)) {
+        str = PyByteArray_AS_STRING(cmd);
+        size = PyByteArray_GET_SIZE(cmd);
+    }
+    else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) {
+        /* Copy to NUL-terminated buffer. */
+        *cmd_copy = PyBytes_FromStringAndSize(
+            (const char *)view.buf, view.len);
+        PyBuffer_Release(&view);
+        if (*cmd_copy == NULL) {
+            return NULL;
+        }
+        str = PyBytes_AS_STRING(*cmd_copy);
+        size = PyBytes_GET_SIZE(*cmd_copy);
     }
     else {
         PyErr_Format(PyExc_TypeError,
@@ -585,7 +602,7 @@ source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompil
     if (strlen(str) != size) {
         PyErr_SetString(PyExc_TypeError,
                         "source code string cannot contain null bytes");
-        PyBuffer_Release(view);
+        Py_CLEAR(*cmd_copy);
         return NULL;
     }
     return str;
@@ -594,7 +611,7 @@ source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompil
 static PyObject *
 builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    Py_buffer view = {NULL, NULL};
+    PyObject *cmd_copy;
     const char *str;
     PyObject *filename;
     char *startstr;
@@ -681,12 +698,12 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
         goto finally;
     }
 
-    str = source_as_string(cmd, "compile", "string, bytes or AST", &cf, &view);
+    str = source_as_string(cmd, "compile", "string, bytes or AST", &cf, &cmd_copy);
     if (str == NULL)
         goto error;
 
     result = Py_CompileStringObject(str, filename, start[mode], &cf, optimize);
-    PyBuffer_Release(&view);
+    Py_XDECREF(cmd_copy);
     goto finally;
 
 error:
@@ -754,9 +771,8 @@ Return the tuple ((x-x%y)/y, x%y).  Invariant: div*y + mod == x.");
 static PyObject *
 builtin_eval(PyObject *self, PyObject *args)
 {
-    PyObject *cmd, *result, *tmp = NULL;
+    PyObject *cmd, *result, *cmd_copy;
     PyObject *globals = Py_None, *locals = Py_None;
-    Py_buffer view = {NULL, NULL};
     const char *str;
     PyCompilerFlags cf;
 
@@ -806,7 +822,7 @@ builtin_eval(PyObject *self, PyObject *args)
     }
 
     cf.cf_flags = PyCF_SOURCE_IS_UTF8;
-    str = source_as_string(cmd, "eval", "string, bytes or code", &cf, &view);
+    str = source_as_string(cmd, "eval", "string, bytes or code", &cf, &cmd_copy);
     if (str == NULL)
         return NULL;
 
@@ -815,8 +831,7 @@ builtin_eval(PyObject *self, PyObject *args)
 
     (void)PyEval_MergeCompilerFlags(&cf);
     result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
-    PyBuffer_Release(&view);
-    Py_XDECREF(tmp);
+    Py_XDECREF(cmd_copy);
     return result;
 }
 
@@ -882,12 +897,13 @@ builtin_exec(PyObject *self, PyObject *args)
         v = PyEval_EvalCode(prog, globals, locals);
     }
     else {
-        Py_buffer view = {NULL, NULL};
+        PyObject *prog_copy;
         const char *str;
         PyCompilerFlags cf;
         cf.cf_flags = PyCF_SOURCE_IS_UTF8;
         str = source_as_string(prog, "exec",
-                                     "string, bytes or code", &cf, &view);
+                                     "string, bytes or code", &cf,
+                                     &prog_copy);
         if (str == NULL)
             return NULL;
         if (PyEval_MergeCompilerFlags(&cf))
@@ -895,7 +911,7 @@ builtin_exec(PyObject *self, PyObject *args)
                                   locals, &cf);
         else
             v = PyRun_String(str, Py_file_input, globals, locals);
-        PyBuffer_Release(&view);
+        Py_XDECREF(prog_copy);
     }
     if (v == NULL)
         return NULL;