From 7b90e168f3d27f10cc087da2b9be1289722e7172 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Thu, 18 May 2006 07:01:27 +0000 Subject: [PATCH] Bug #1462152: file() now checks more thoroughly for invalid mode strings and removes a possible "U" before passing the mode to the C library function. --- Doc/lib/libfuncs.tex | 8 ++++- Lib/test/test_file.py | 2 +- Misc/NEWS | 4 +++ Objects/fileobject.c | 83 +++++++++++++++++++++++++------------------ 4 files changed, 60 insertions(+), 37 deletions(-) diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index ff81faab18..7cfdfbb0f8 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -418,7 +418,7 @@ class C: that differentiate between binary and text files (else it is ignored). If the file cannot be opened, \exception{IOError} is raised. - + In addition to the standard \cfunction{fopen()} values \var{mode} may be \code{'U'} or \code{'rU'}. If Python is built with universal newline support (the default) the file is opened as a text file, but @@ -434,6 +434,9 @@ class C: have yet been seen), \code{'\e n'}, \code{'\e r'}, \code{'\e r\e n'}, or a tuple containing all the newline types seen. + Python enforces that the mode, after stripping \code{'U'}, begins with + \code{'r'}, \code{'w'} or \code{'a'}. + If \var{mode} is omitted, it defaults to \code{'r'}. When opening a binary file, you should append \code{'b'} to the \var{mode} value for improved portability. (It's useful even on systems which don't @@ -456,6 +459,9 @@ class C: determine whether this is the case.} \versionadded{2.2} + + \versionchanged[Restriction on first letter of mode string + introduced]{2.5} \end{funcdesc} \begin{funcdesc}{filter}{function, list} diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index a9f5e4677b..53f9953071 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -136,7 +136,7 @@ f.close() bad_mode = "qwerty" try: open(TESTFN, bad_mode) -except IOError, msg: +except ValueError, msg: if msg[0] != 0: s = str(msg) if s.find(TESTFN) != -1 or s.find(bad_mode) == -1: diff --git a/Misc/NEWS b/Misc/NEWS index 99dcec8010..679bd3060e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 2.5 alpha 3? Core and builtins ----------------- +- Bug #1462152: file() now checks more thoroughly for invalid mode + strings and removes a possible "U" before passing the mode to the + C library function. + - Patch #1488312, Fix memory alignment problem on SPARC in unicode - Bug #1487966: Fix SystemError with conditional expression in assignment diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 0f166cdb6c..29a5d4a197 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -136,46 +136,45 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode, /* check for known incorrect mode strings - problem is, platforms are free to accept any mode characters they like and are supposed to ignore stuff they don't understand... write or append mode with - universal newline support is expressly forbidden by PEP 278. */ + universal newline support is expressly forbidden by PEP 278. + Additionally, remove the 'U' from the mode string as platforms + won't know what it is. */ /* zero return is kewl - one is un-kewl */ static int -check_the_mode(char *mode) +sanitize_the_mode(char *mode) { + char *upos; size_t len = strlen(mode); - switch (len) { - case 0: + if (!len) { PyErr_SetString(PyExc_ValueError, "empty mode string"); return 1; + } - /* reject wU, aU */ - case 2: - switch (mode[0]) { - case 'w': - case 'a': - if (mode[1] == 'U') { - PyErr_SetString(PyExc_ValueError, - "invalid mode string"); - return 1; - } - break; + upos = strchr(mode, 'U'); + if (upos) { + memmove(upos, upos+1, len-(upos-mode)); /* incl null char */ + + if (mode[0] == 'w' || mode[0] == 'a') { + PyErr_Format(PyExc_ValueError, "universal newline " + "mode can only be used with modes " + "starting with 'r'"); + return 1; } - break; - /* reject w+U, a+U, wU+, aU+ */ - case 3: - switch (mode[0]) { - case 'w': - case 'a': - if ((mode[1] == '+' && mode[2] == 'U') || - (mode[1] == 'U' && mode[2] == '+')) { - PyErr_SetString(PyExc_ValueError, - "invalid mode string"); - return 1; - } - break; + if (mode[0] != 'r') { + memmove(mode+1, mode, strlen(mode)+1); + mode[0] = 'r'; } - break; + + if (!strchr(mode, 'b')) { + memmove(mode+2, mode+1, strlen(mode)); + mode[1] = 'b'; + } + } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') { + PyErr_Format(PyExc_ValueError, "mode string must begin with " + "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode); + return 1; } return 0; @@ -184,6 +183,7 @@ check_the_mode(char *mode) static PyObject * open_the_file(PyFileObject *f, char *name, char *mode) { + char *newmode; assert(f != NULL); assert(PyFile_Check(f)); #ifdef MS_WINDOWS @@ -195,8 +195,18 @@ open_the_file(PyFileObject *f, char *name, char *mode) assert(mode != NULL); assert(f->f_fp == NULL); - if (check_the_mode(mode)) + /* probably need to replace 'U' by 'rb' */ + newmode = PyMem_MALLOC(strlen(mode) + 3); + if (!newmode) { + PyErr_NoMemory(); return NULL; + } + strcpy(newmode, mode); + + if (sanitize_the_mode(newmode)) { + f = NULL; + goto cleanup; + } /* rexec.py can't stop a user from getting the file() constructor -- all they have to do is get *any* file object f, and then do @@ -204,16 +214,15 @@ open_the_file(PyFileObject *f, char *name, char *mode) if (PyEval_GetRestricted()) { PyErr_SetString(PyExc_IOError, "file() constructor not accessible in restricted mode"); - return NULL; + f = NULL; + goto cleanup; } errno = 0; - if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0) - mode = "rb"; #ifdef MS_WINDOWS if (PyUnicode_Check(f->f_name)) { PyObject *wmode; - wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL); + wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL); if (f->f_name && wmode) { Py_BEGIN_ALLOW_THREADS /* PyUnicode_AS_UNICODE OK without thread @@ -227,7 +236,7 @@ open_the_file(PyFileObject *f, char *name, char *mode) #endif if (NULL == f->f_fp && NULL != name) { Py_BEGIN_ALLOW_THREADS - f->f_fp = fopen(name, mode); + f->f_fp = fopen(name, newmode); Py_END_ALLOW_THREADS } @@ -254,6 +263,10 @@ open_the_file(PyFileObject *f, char *name, char *mode) } if (f != NULL) f = dircheck(f); + +cleanup: + PyMem_FREE(newmode); + return (PyObject *)f; } -- 2.40.0