From: Walter Dörwald Date: Tue, 12 Jul 2005 21:53:43 +0000 (+0000) Subject: Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a source file X-Git-Tag: v2.5a0~1595 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c1f5fff2b7c51fd5420a4dfb8a2b1c297c993c10;p=python Apply SF patch #1101726: Fix buffer overrun in tokenizer.c when a source file with a PEP 263 encoding declaration results in long decoded line. --- diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 1884d011e8..6957cc9593 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -334,7 +334,19 @@ check_bom(int get_char(struct tok_state *), } /* Read a line of text from TOK into S, using the stream in TOK. - Return NULL on failure, else S. */ + Return NULL on failure, else S. + + On entry, tok->decoding_buffer will be one of: + 1) NULL: need to call tok->decoding_readline to get a new line + 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and + stored the result in tok->decoding_buffer + 3) PyStringObject *: previous call to fp_readl did not have enough room + (in the s buffer) to copy entire contents of the line read + by tok->decoding_readline. tok->decoding_buffer has the overflow. + In this case, fp_readl is called in a loop (with an expanded buffer) + until the buffer ends with a '\n' (or until the end of the file is + reached): see tok_nextc and its calls to decoding_fgets. +*/ static char * fp_readl(char *s, int size, struct tok_state *tok) @@ -344,32 +356,45 @@ fp_readl(char *s, int size, struct tok_state *tok) Py_FatalError("fp_readl should not be called in this build."); return NULL; /* Keep compiler happy (not reachable) */ #else - PyObject* utf8; + PyObject* utf8 = NULL; PyObject* buf = tok->decoding_buffer; + char *str; + int utf8len; + + /* Ask for one less byte so we can terminate it */ + assert(size > 0); + size--; + if (buf == NULL) { - /* Ask for one less byte so we can terminate it */ - PyObject *args = Py_BuildValue("(i)", size-1); - if (args == NULL) - return error_ret(tok); - buf = PyObject_Call(tok->decoding_readline, args, NULL); - Py_DECREF(args); + buf = PyObject_CallObject(tok->decoding_readline, NULL); if (buf == NULL) return error_ret(tok); } else { tok->decoding_buffer = NULL; + if (PyString_CheckExact(buf)) + utf8 = buf; } - utf8 = PyUnicode_AsUTF8String(buf); - Py_DECREF(buf); - if (utf8 == NULL) - return error_ret(tok); - else { - const char* str = PyString_AsString(utf8); - assert(strlen(str) < (size_t)size); /* XXX */ - strcpy(s, str); - Py_DECREF(utf8); - if (s[0] == '\0') return NULL; /* EOF */ - return s; + if (utf8 == NULL) { + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + if (utf8 == NULL) + return error_ret(tok); + } + str = PyString_AsString(utf8); + utf8len = PyString_GET_SIZE(utf8); + if (utf8len > size) { + tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); + if (tok->decoding_buffer == NULL) { + Py_DECREF(utf8); + return error_ret(tok); + } + utf8len = size; } + memcpy(s, str, utf8len); + s[utf8len] = '\0'; + Py_DECREF(utf8); + if (utf8len == 0) return NULL; /* EOF */ + return s; #endif } @@ -491,14 +516,7 @@ decoding_feof(struct tok_state *tok) } else { PyObject* buf = tok->decoding_buffer; if (buf == NULL) { - PyObject *args = PyTuple_New(0); - if (args == NULL) { - error_ret(tok); - return 1; - } - buf = PyObject_Call(tok->decoding_readline, - args, NULL); - Py_DECREF(args); + buf = PyObject_CallObject(tok->decoding_readline, NULL); if (buf == NULL) { error_ret(tok); return 1;