]> granicus.if.org Git - python/commitdiff
Issues #2384 and #3975: Tracebacks were not correctly printed when the source file
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>
Thu, 9 Oct 2008 23:37:48 +0000 (23:37 +0000)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>
Thu, 9 Oct 2008 23:37:48 +0000 (23:37 +0000)
contains a ``coding:`` header: the wrong line was displayed, and the encoding was not respected.

Patch by Victor Stinner.

Lib/test/test_traceback.py
Misc/NEWS
Parser/tokenizer.c
Python/traceback.c

index 3f69e5e89352007e8c8283d8d3ce950e4fe407d9..c44e2b1d110ae890e67145a66711fba21a50fe4f 100644 (file)
@@ -6,6 +6,7 @@ import sys
 import unittest
 import re
 from test.support import run_unittest, is_jython, Error, captured_output
+from test.support import TESTFN, unlink
 
 import traceback
 
@@ -90,6 +91,70 @@ class SyntaxTracebackCases(unittest.TestCase):
         err = traceback.format_exception_only(None, None)
         self.assertEqual(err, ['None\n'])
 
+    def test_encoded_file(self):
+        # Test that tracebacks are correctly printed for encoded source files:
+        # - correct line number (Issue2384)
+        # - respect file encoding (Issue3975)
+        import tempfile, sys, subprocess, os
+
+        # The spawned subprocess has its stdout redirected to a PIPE, and its
+        # encoding may be different from the current interpreter, on Windows
+        # at least.
+        process = subprocess.Popen([sys.executable, "-c",
+                                    "import sys; print(sys.stdout.encoding)"],
+                                   stdout=subprocess.PIPE,
+                                   stderr=subprocess.STDOUT)
+        stdout, stderr = process.communicate()
+        output_encoding = str(stdout, 'ascii').splitlines()[0]
+
+        def do_test(firstlines, message, charset, lineno):
+            # Raise the message in a subprocess, and catch the output
+            try:
+                output = open(TESTFN, "w", encoding=charset)
+                output.write("""{0}if 1:
+                    import traceback;
+                    raise RuntimeError('{1}')
+                    """.format(firstlines, message))
+                output.close()
+                process = subprocess.Popen([sys.executable, TESTFN],
+                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+                stdout, stderr = process.communicate()
+                stdout = stdout.decode(output_encoding).splitlines()
+            finally:
+                unlink(TESTFN)
+
+            # The source lines are encoded with the 'backslashreplace' handler
+            encoded_message = message.encode(output_encoding,
+                                             'backslashreplace')
+            # and we just decoded them with the output_encoding.
+            message_ascii = encoded_message.decode(output_encoding)
+
+            err_line = "raise RuntimeError('{0}')".format(message_ascii)
+            err_msg = "RuntimeError: {0}".format(message_ascii)
+
+            self.assert_(("line %s" % lineno) in stdout[1],
+                "Invalid line number: {0!r} instead of {1}".format(
+                    stdout[1], lineno))
+            self.assert_(stdout[2].endswith(err_line),
+                "Invalid traceback line: {0!r} instead of {1!r}".format(
+                    stdout[2], err_line))
+            self.assert_(stdout[3] == err_msg,
+                "Invalid error message: {0!r} instead of {1!r}".format(
+                    stdout[3], err_msg))
+
+        do_test("", "foo", "ascii", 3)
+        for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
+            if charset == "ascii":
+                text = "foo"
+            elif charset == "GBK":
+                text = "\u4E02\u5100"
+            else:
+                text = "h\xe9 ho"
+            do_test("# coding: {0}\n".format(charset),
+                    text, charset, 4)
+            do_test("#!shebang\n# coding: {0}\n".format(charset),
+                    text, charset, 5)
+
 
 class TracebackFormatTests(unittest.TestCase):
 
index 2505bfa56a7893b8740a89634ea16e3581fe1e1d..7bad53ce84a2f4a4d49c56f081652dce3972dfe7 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,6 +15,10 @@ What's New in Python 3.0 beta 5
 Core and Builtins
 -----------------
 
+- Issues #2384 and #3975: Tracebacks were not correctly printed when the
+  source file contains a ``coding:`` header: the wrong line was displayed, and
+  the encoding was not respected.
+
 - Issue #3740: Null-initialize module state.
 
 - Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object.
index 18815aef4529d5f3f9bc39ab2f44d136d17da998..4edf6d07e44ece96717e436cc5e0486d38de22c9 100644 (file)
@@ -461,6 +461,14 @@ fp_setreadl(struct tok_state *tok, const char* enc)
        readline = PyObject_GetAttrString(stream, "readline");
        tok->decoding_readline = readline;
 
+       /* The file has been reopened; parsing will restart from
+        * the beginning of the file, we have to reset the line number.
+        * But this function has been called from inside tok_nextc() which
+        * will increment lineno before it returns. So we set it -1 so that
+        * the next call to tok_nextc() will start with tok->lineno == 0.
+        */
+       tok->lineno = -1;
+
   cleanup:
        Py_XDECREF(stream);
        Py_XDECREF(io);
index dffce35b44530a57098482c2d0561674d6abdbba..63ecc3cb16e4329c755374db2de203633cbc7d11 100644 (file)
@@ -8,9 +8,15 @@
 #include "structmember.h"
 #include "osdefs.h"
 #include "traceback.h"
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
 
 #define OFF(x) offsetof(PyTracebackObject, x)
 
+/* Method from Parser/tokenizer.c */
+extern char * PyTokenizer_FindEncoding(int);
+
 static PyObject *
 tb_dir(PyTracebackObject *self)
 {
@@ -128,102 +134,156 @@ PyTraceBack_Here(PyFrameObject *frame)
        return 0;
 }
 
+static int
+_Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags)
+{
+       int i;
+       int fd = -1;
+       PyObject *v;
+       Py_ssize_t _npath;
+       int npath;
+       size_t taillen;
+       PyObject *syspath;
+       const char* path;
+       const char* tail;
+       Py_ssize_t len;
+
+       /* Search tail of filename in sys.path before giving up */
+       tail = strrchr(filename, SEP);
+       if (tail == NULL)
+               tail = filename;
+       else
+               tail++;
+       taillen = strlen(tail);
+
+       syspath = PySys_GetObject("path");
+       if (syspath == NULL || !PyList_Check(syspath))
+               return -1;
+       _npath = PyList_Size(syspath);
+       npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
+
+       for (i = 0; i < npath; i++) {
+               v = PyList_GetItem(syspath, i);
+               if (v == NULL) {
+                       PyErr_Clear();
+                       break;
+               }
+               if (!PyUnicode_Check(v))
+                       continue;
+               path = _PyUnicode_AsStringAndSize(v, &len);
+               if (len + 1 + taillen >= (Py_ssize_t)namelen - 1)
+                       continue; /* Too long */
+               strcpy(namebuf, path);
+               if (strlen(namebuf) != len)
+                       continue; /* v contains '\0' */
+               if (len > 0 && namebuf[len-1] != SEP)
+                       namebuf[len++] = SEP;
+               strcpy(namebuf+len, tail);
+               Py_BEGIN_ALLOW_THREADS
+               fd = open(namebuf, open_flags);
+               Py_END_ALLOW_THREADS
+               if (0 <= fd) {
+                       return fd;
+               }
+       }
+       return -1;
+}
+
 int
 _Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent)
 {
        int err = 0;
-       FILE *xfp = NULL;
-       char linebuf[2000];
+       int fd;
        int i;
-       char namebuf[MAXPATHLEN+1];
+       char *found_encoding;
+       char *encoding;
+       PyObject *fob = NULL;
+       PyObject *lineobj = NULL;
+#ifdef O_BINARY
+       const int open_flags = O_RDONLY | O_BINARY;   /* necessary for Windows */
+#else
+       const int open_flags = O_RDONLY;
+#endif
+       char buf[MAXPATHLEN+1];
+       Py_UNICODE *u, *p;
+       Py_ssize_t len;
 
+       /* open the file */
        if (filename == NULL)
-               return -1;
-       xfp = fopen(filename, "r" PY_STDIOTEXTMODE);
-       if (xfp == NULL) {
-               /* Search tail of filename in sys.path before giving up */
-               PyObject *path;
-               const char *tail = strrchr(filename, SEP);
-               if (tail == NULL)
-                       tail = filename;
-               else
-                       tail++;
-               path = PySys_GetObject("path");
-               if (path != NULL && PyList_Check(path)) {
-                       Py_ssize_t _npath = PyList_Size(path);
-                       int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
-                       size_t taillen = strlen(tail);
-                       for (i = 0; i < npath; i++) {
-                               PyObject *v = PyList_GetItem(path, i);
-                               if (v == NULL) {
-                                       PyErr_Clear();
-                                       break;
-                               }
-                               if (PyBytes_Check(v)) {
-                                       size_t len;
-                                       len = PyBytes_GET_SIZE(v);
-                                       if (len + 1 + taillen >= MAXPATHLEN)
-                                               continue; /* Too long */
-                                       strcpy(namebuf, PyBytes_AsString(v));
-                                       if (strlen(namebuf) != len)
-                                               continue; /* v contains '\0' */
-                                       if (len > 0 && namebuf[len-1] != SEP)
-                                               namebuf[len++] = SEP;
-                                       strcpy(namebuf+len, tail);
-                                       xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE);
-                                       if (xfp != NULL) {
-                                               filename = namebuf;
-                                               break;
-                                       }
-                               }
-                       }
-               }
+               return 0;
+       Py_BEGIN_ALLOW_THREADS
+       fd = open(filename, open_flags);
+       Py_END_ALLOW_THREADS
+       if (fd < 0) {
+               fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags);
+               if (fd < 0)
+                       return 0;
+               filename = buf;
        }
 
-        if (xfp == NULL)
-            return err;
-        if (err != 0) {
-            fclose(xfp);
-            return err;
-        }
+       /* use the right encoding to decode the file as unicode */
+       found_encoding = PyTokenizer_FindEncoding(fd);
+       encoding = (found_encoding != NULL) ? found_encoding :
+               (char*)PyUnicode_GetDefaultEncoding();
+       lseek(fd, 0, 0); /* Reset position */
+       fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding,
+               NULL, NULL, 1);
+       PyMem_FREE(found_encoding);
+       if (fob == NULL) {
+               PyErr_Clear();
+               close(fd);
+               return 0;
+       }
 
+       /* get the line number lineno */
        for (i = 0; i < lineno; i++) {
-               char* pLastChar = &linebuf[sizeof(linebuf)-2];
-               do {
-                       *pLastChar = '\0';
-                       if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL)
-                               break;
-                       /* fgets read *something*; if it didn't get as
-                          far as pLastChar, it must have found a newline
-                          or hit the end of the file;  if pLastChar is \n,
-                          it obviously found a newline; else we haven't
-                          yet seen a newline, so must continue */
-               } while (*pLastChar != '\0' && *pLastChar != '\n');
+               Py_XDECREF(lineobj);
+               lineobj = PyFile_GetLine(fob, -1);
+               if (!lineobj) {
+                       err = -1;
+                       break;
+               }
        }
-       if (i == lineno) {
-               char buf[11];
-               char *p = linebuf;
-               while (*p == ' ' || *p == '\t' || *p == '\014')
-                       p++;
-
-               /* Write some spaces before the line */
-               strcpy(buf, "          ");
-               assert (strlen(buf) == 10);
-               while (indent > 0) {
-                       if(indent < 10)
-                               buf[indent] = '\0';
-                       err = PyFile_WriteString(buf, f);
-                       if (err != 0)
-                               break;
-                       indent -= 10;
+       Py_DECREF(fob);
+       if (!lineobj || !PyUnicode_Check(lineobj)) {
+               Py_XDECREF(lineobj);
+               return err;
+       }
+
+       /* remove the indentation of the line */
+       u = PyUnicode_AS_UNICODE(lineobj);
+       len = PyUnicode_GET_SIZE(lineobj);
+       for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
+               len--;
+       if (u != p) {
+               PyObject *truncated;
+               truncated = PyUnicode_FromUnicode(p, len);
+               if (truncated) {
+                       Py_DECREF(lineobj);
+                       lineobj = truncated;
+               } else {
+                       PyErr_Clear();
                }
+       }
 
-               if (err == 0)
-                       err = PyFile_WriteString(p, f);
-               if (err == 0 && strchr(p, '\n') == NULL)
-                       err = PyFile_WriteString("\n", f);
+       /* Write some spaces before the line */
+       strcpy(buf, "          ");
+       assert (strlen(buf) == 10);
+       while (indent > 0) {
+               if(indent < 10)
+                       buf[indent] = '\0';
+               err = PyFile_WriteString(buf, f);
+               if (err != 0)
+                       break;
+               indent -= 10;
        }
-       fclose(xfp);
+
+       /* finally display the line */
+       if (err == 0)
+               err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
+       Py_DECREF(lineobj);
+       if  (err == 0)
+               err = PyFile_WriteString("\n", f);
        return err;
 }