From 1df2cbed761401e83ccb684352e9ab9264fccfd9 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 5 Oct 2016 21:45:48 -0700 Subject: [PATCH] mmap: do all internal arithmetic with Py_ssize_t while being very careful about overflow --- Lib/test/test_mmap.py | 10 +++ Misc/NEWS | 3 + Modules/mmapmodule.c | 184 ++++++++++++++++++------------------------ 3 files changed, 91 insertions(+), 106 deletions(-) diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 648da3ad32..037c4940cf 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -652,6 +652,16 @@ class MmapTests(unittest.TestCase): finally: s.close() + def test_resize_past_pos(self): + m = mmap.mmap(-1, 8192) + self.addCleanup(m.close) + m.read(5000) + m.resize(4096) + self.assertEqual(m.read(14), '') + self.assertRaises(ValueError, m.read_byte,) + self.assertRaises(ValueError, m.write_byte, 'b') + self.assertRaises(ValueError, m.write, 'abc') + class LargeMmapTests(unittest.TestCase): diff --git a/Misc/NEWS b/Misc/NEWS index fc1e185597..4899aad63e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -46,6 +46,9 @@ Core and Builtins Library ------- +- Fix possible integer overflows and crashes in the mmap module with unusual + usage patterns. + - Issue #27897: Fixed possible crash in sqlite3.Connection.create_collation() if pass invalid string-like object as a name. Original patch by Xiang Zhang. diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index 1ebccdfa82..297bb074ce 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -91,8 +91,8 @@ typedef enum typedef struct { PyObject_HEAD char * data; - size_t size; - size_t pos; /* relative to offset */ + Py_ssize_t size; + Py_ssize_t pos; /* relative to offset */ #ifdef MS_WINDOWS PY_LONG_LONG offset; #else @@ -204,33 +204,32 @@ mmap_read_byte_method(mmap_object *self, PyObject *unused) { CHECK_VALID(NULL); - if (self->pos < self->size) { - char value = self->data[self->pos]; - self->pos += 1; - return Py_BuildValue("c", value); - } else { + if (self->pos >= self->size) { PyErr_SetString(PyExc_ValueError, "read byte out of range"); return NULL; } + return PyString_FromStringAndSize(&self->data[self->pos++], 1); } static PyObject * mmap_read_line_method(mmap_object *self, PyObject *unused) { - char *start = self->data+self->pos; - char *eof = self->data+self->size; - char *eol; + Py_ssize_t remaining; + char *start, *eol; PyObject *result; CHECK_VALID(NULL); - eol = memchr(start, '\n', self->size - self->pos); + remaining = (self->pos < self->size) ? self->size - self->pos : 0; + if (!remaining) + return PyString_FromString(""); + start = self->data + self->pos; + eol = memchr(start, '\n', remaining); if (!eol) - eol = eof; + eol = self->data + self->size; else - ++eol; /* we're interested in the position after the - newline. */ + ++eol; /* advance past newline */ result = PyString_FromStringAndSize(start, (eol - start)); self->pos += (eol - start); return result; @@ -240,28 +239,18 @@ static PyObject * mmap_read_method(mmap_object *self, PyObject *args) { - Py_ssize_t num_bytes, n; + Py_ssize_t num_bytes, remaining; PyObject *result; CHECK_VALID(NULL); if (!PyArg_ParseTuple(args, "n:read", &num_bytes)) - return(NULL); + return NULL; /* silently 'adjust' out-of-range requests */ - assert(self->size >= self->pos); - n = self->size - self->pos; - /* The difference can overflow, only if self->size is greater than - * PY_SSIZE_T_MAX. But then the operation cannot possibly succeed, - * because the mapped area and the returned string each need more - * than half of the addressable memory. So we clip the size, and let - * the code below raise MemoryError. - */ - if (n < 0) - n = PY_SSIZE_T_MAX; - if (num_bytes < 0 || num_bytes > n) { - num_bytes = n; - } - result = Py_BuildValue("s#", self->data+self->pos, num_bytes); + remaining = (self->pos < self->size) ? self->size - self->pos : 0; + if (num_bytes < 0 || num_bytes > remaining) + num_bytes = remaining; + result = PyString_FromStringAndSize(&self->data[self->pos], num_bytes); self->pos += num_bytes; return result; } @@ -288,14 +277,14 @@ mmap_gfind(mmap_object *self, start += self->size; if (start < 0) start = 0; - else if ((size_t)start > self->size) + else if (start > self->size) start = self->size; if (end < 0) end += self->size; if (end < 0) end = 0; - else if ((size_t)end > self->size) + else if (end > self->size) end = self->size; start_p = self->data + start; @@ -362,12 +351,12 @@ mmap_write_method(mmap_object *self, if (!is_writeable(self)) return NULL; - if ((self->pos + length) > self->size) { + if (self->pos > self->size || self->size - self->pos < length) { PyErr_SetString(PyExc_ValueError, "data out of range"); return NULL; } - memcpy(self->data+self->pos, data, length); - self->pos = self->pos+length; + memcpy(&self->data[self->pos], data, length); + self->pos += length; Py_INCREF(Py_None); return Py_None; } @@ -386,8 +375,7 @@ mmap_write_byte_method(mmap_object *self, return NULL; if (self->pos < self->size) { - *(self->data+self->pos) = value; - self->pos += 1; + self->data[self->pos++] = value; Py_INCREF(Py_None); return Py_None; } @@ -458,8 +446,14 @@ mmap_resize_method(mmap_object *self, if (!PyArg_ParseTuple(args, "n:resize", &new_size) || !is_resizeable(self)) { return NULL; + } + if (new_size < 0 || PY_SSIZE_T_MAX - new_size < self->offset) { + PyErr_SetString(PyExc_ValueError, "new size out of range"); + return NULL; + } + + { #ifdef MS_WINDOWS - } else { DWORD dwErrCode = 0; DWORD off_hi, off_lo, newSizeLow, newSizeHigh; /* First, unmap the file view */ @@ -509,15 +503,13 @@ mmap_resize_method(mmap_object *self, #ifdef UNIX #ifndef HAVE_MREMAP - } else { PyErr_SetString(PyExc_SystemError, "mmap: resizing not available--no mremap()"); return NULL; #else - } else { void *newmap; - if (ftruncate(self->fd, self->offset + new_size) == -1) { + if (self->fd != -1 && ftruncate(self->fd, self->offset + new_size) == -1) { PyErr_SetFromErrno(mmap_module_error); return NULL; } @@ -525,11 +517,11 @@ mmap_resize_method(mmap_object *self, #ifdef MREMAP_MAYMOVE newmap = mremap(self->data, self->size, new_size, MREMAP_MAYMOVE); #else - #if defined(__NetBSD__) - newmap = mremap(self->data, self->size, self->data, new_size, 0); - #else - newmap = mremap(self->data, self->size, new_size, 0); - #endif /* __NetBSD__ */ +#if defined(__NetBSD__) + newmap = mremap(self->data, self->size, self->data, new_size, 0); +#else + newmap = mremap(self->data, self->size, new_size, 0); +#endif /* __NetBSD__ */ #endif if (newmap == (void *)-1) { @@ -560,7 +552,7 @@ mmap_flush_method(mmap_object *self, PyObject *args) CHECK_VALID(NULL); if (!PyArg_ParseTuple(args, "|nn:flush", &offset, &size)) return NULL; - if ((size_t)(offset + size) > self->size) { + if (size < 0 || offset < 0 || self->size - offset < size) { PyErr_SetString(PyExc_ValueError, "flush values out of range"); return NULL; } @@ -601,12 +593,12 @@ mmap_seek_method(mmap_object *self, PyObject *args) where = dist; break; case 1: /* relative to current position */ - if ((Py_ssize_t)self->pos + dist < 0) + if (PY_SSIZE_T_MAX - self->pos < dist) goto onoutofrange; where = self->pos + dist; break; case 2: /* relative to end */ - if ((Py_ssize_t)self->size + dist < 0) + if (PY_SSIZE_T_MAX - self->size < dist) goto onoutofrange; where = self->size + dist; break; @@ -629,23 +621,27 @@ mmap_seek_method(mmap_object *self, PyObject *args) static PyObject * mmap_move_method(mmap_object *self, PyObject *args) { - unsigned long dest, src, cnt; + Py_ssize_t dest, src, cnt; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "kkk:move", &dest, &src, &cnt) || + if (!PyArg_ParseTuple(args, "nnn:move", &dest, &src, &cnt) || !is_writeable(self)) { return NULL; } else { /* bounds check the values */ - if (cnt < 0 || (cnt + dest) < cnt || (cnt + src) < cnt || - src < 0 || src > self->size || (src + cnt) > self->size || - dest < 0 || dest > self->size || (dest + cnt) > self->size) { - PyErr_SetString(PyExc_ValueError, - "source, destination, or count out of range"); - return NULL; - } - memmove(self->data+dest, self->data+src, cnt); + if (dest < 0 || src < 0 || cnt < 0) + goto bounds; + if (self->size - dest < cnt || self->size - src < cnt) + goto bounds; + + memmove(&self->data[dest], &self->data[src], cnt); + Py_INCREF(Py_None); return Py_None; + + bounds: + PyErr_SetString(PyExc_ValueError, + "source, destination, or count out of range"); + return NULL; } } @@ -745,7 +741,7 @@ static PyObject * mmap_item(mmap_object *self, Py_ssize_t i) { CHECK_VALID(NULL); - if (i < 0 || (size_t)i >= self->size) { + if (i < 0 || i >= self->size) { PyErr_SetString(PyExc_IndexError, "mmap index out of range"); return NULL; } @@ -758,13 +754,13 @@ mmap_slice(mmap_object *self, Py_ssize_t ilow, Py_ssize_t ihigh) CHECK_VALID(NULL); if (ilow < 0) ilow = 0; - else if ((size_t)ilow > self->size) + else if (ilow > self->size) ilow = self->size; if (ihigh < 0) ihigh = 0; if (ihigh < ilow) ihigh = ilow; - else if ((size_t)ihigh > self->size) + else if (ihigh > self->size) ihigh = self->size; return PyString_FromStringAndSize(self->data + ilow, ihigh-ilow); @@ -780,7 +776,7 @@ mmap_subscript(mmap_object *self, PyObject *item) return NULL; if (i < 0) i += self->size; - if (i < 0 || (size_t)i >= self->size) { + if (i < 0 || i >= self->size) { PyErr_SetString(PyExc_IndexError, "mmap index out of range"); return NULL; @@ -850,13 +846,13 @@ mmap_ass_slice(mmap_object *self, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v CHECK_VALID(-1); if (ilow < 0) ilow = 0; - else if ((size_t)ilow > self->size) + else if (ilow > self->size) ilow = self->size; if (ihigh < 0) ihigh = 0; if (ihigh < ilow) ihigh = ilow; - else if ((size_t)ihigh > self->size) + else if (ihigh > self->size) ihigh = self->size; if (v == NULL) { @@ -887,7 +883,7 @@ mmap_ass_item(mmap_object *self, Py_ssize_t i, PyObject *v) const char *buf; CHECK_VALID(-1); - if (i < 0 || (size_t)i >= self->size) { + if (i < 0 || i >= self->size) { PyErr_SetString(PyExc_IndexError, "mmap index out of range"); return -1; } @@ -921,7 +917,7 @@ mmap_ass_subscript(mmap_object *self, PyObject *item, PyObject *value) return -1; if (i < 0) i += self->size; - if (i < 0 || (size_t)i >= self->size) { + if (i < 0 || i >= self->size) { PyErr_SetString(PyExc_IndexError, "mmap index out of range"); return -1; @@ -1092,32 +1088,6 @@ static PyTypeObject mmap_object_type = { }; -/* extract the map size from the given PyObject - - Returns -1 on error, with an appropriate Python exception raised. On - success, the map size is returned. */ -static Py_ssize_t -_GetMapSize(PyObject *o, const char* param) -{ - if (o == NULL) - return 0; - if (PyIndex_Check(o)) { - Py_ssize_t i = PyNumber_AsSsize_t(o, PyExc_OverflowError); - if (i==-1 && PyErr_Occurred()) - return -1; - if (i < 0) { - PyErr_Format(PyExc_OverflowError, - "memory mapped %s must be positive", - param); - return -1; - } - return i; - } - - PyErr_SetString(PyExc_TypeError, "map size must be an integral value"); - return -1; -} - #ifdef UNIX #ifdef HAVE_LARGEFILE_SUPPORT #define _Py_PARSE_OFF_T "L" @@ -1132,7 +1102,6 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) struct stat st; #endif mmap_object *m_obj; - PyObject *map_size_obj = NULL; Py_ssize_t map_size; off_t offset = 0; int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ; @@ -1142,13 +1111,15 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "flags", "prot", "access", "offset", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords, - &fd, &map_size_obj, &flags, &prot, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|iii" _Py_PARSE_OFF_T, keywords, + &fd, &map_size, &flags, &prot, &access, &offset)) return NULL; - map_size = _GetMapSize(map_size_obj, "size"); - if (map_size < 0) + if (map_size < 0) { + PyErr_SetString(PyExc_OverflowError, + "memory mapped length must be postiive"); return NULL; + } if (offset < 0) { PyErr_SetString(PyExc_OverflowError, "memory mapped offset must be positive"); @@ -1220,7 +1191,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) return NULL; } map_size = (Py_ssize_t) (st.st_size - offset); - } else if (offset + (size_t)map_size > st.st_size) { + } else if (offset > st.st_size || st.st_size - offset < map_size) { PyErr_SetString(PyExc_ValueError, "mmap length is greater than file size"); return NULL; @@ -1230,8 +1201,8 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) m_obj = (mmap_object *)type->tp_alloc(type, 0); if (m_obj == NULL) {return NULL;} m_obj->data = NULL; - m_obj->size = (size_t) map_size; - m_obj->pos = (size_t) 0; + m_obj->size = map_size; + m_obj->pos = 0; m_obj->offset = offset; if (fd == -1) { m_obj->fd = -1; @@ -1290,7 +1261,6 @@ static PyObject * new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) { mmap_object *m_obj; - PyObject *map_size_obj = NULL; Py_ssize_t map_size; PY_LONG_LONG offset = 0, size; DWORD off_hi; /* upper 32 bits of offset */ @@ -1307,8 +1277,8 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "tagname", "access", "offset", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords, - &fileno, &map_size_obj, + if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|ziL", keywords, + &fileno, &map_size, &tagname, &access, &offset)) { return NULL; } @@ -1331,9 +1301,11 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict) "mmap invalid access parameter."); } - map_size = _GetMapSize(map_size_obj, "size"); - if (map_size < 0) + if (map_size < 0) { + PyErr_SetString(PyExc_OverflowError, + "memory mapped length must be postiive"); return NULL; + } if (offset < 0) { PyErr_SetString(PyExc_OverflowError, "memory mapped offset must be positive"); -- 2.50.1