Issue #2382: SyntaxError cursor "^" now is written at correct position in most

author Serhiy Storchaka <storchaka@gmail.com>

Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)

committer Serhiy Storchaka <storchaka@gmail.com>

Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)
author Serhiy Storchaka <storchaka@gmail.com>
Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)
committer Serhiy Storchaka <storchaka@gmail.com>
Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)
diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py

index 1ad7f97b740ed300e1e9c30835a3aff515f6e94f..fe660bf9b4515d76c812e1624bba4b77e554300d 100644 (file)
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -148,6 +148,19 @@ class ExceptionTests(unittest.TestCase):
          ckmsg(s, "'continue' not properly in loop")
          ckmsg("continue\n", "'continue' not properly in loop")
  
+    def testSyntaxErrorOffset(self):
+        def check(src, lineno, offset):
+            with self.assertRaises(SyntaxError) as cm:
+                compile(src, '<fragment>', 'exec')
+            self.assertEqual(cm.exception.lineno, lineno)
+            self.assertEqual(cm.exception.offset, offset)
+
+        check('def fact(x):\n\treturn x!\n', 2, 10)
+        check('1 +\n', 1, 4)
+        check('def spam():\n  print(1)\n print(2)', 3, 10)
+        check('Python = "Python" +', 1, 20)
+        check('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', 1, 20)
+
      @cpython_only
      def testSettingException(self):
          # test that setting an exception at the C level works even if the
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py

index bca825de43bf2fb75a8512024a6a1380c2a5aa7d..373d9af6418f681aab5c724b30b51501b454b018 100644 (file)
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -32,6 +32,9 @@ class SyntaxTracebackCases(unittest.TestCase):
      def syntax_error_bad_indentation(self):
          compile("def spam():\n  print(1)\n print(2)", "?", "exec")
  
+    def syntax_error_with_caret_non_ascii(self):
+        compile('Python = "\u1e54\xfd\u0163\u0125\xf2\xf1" +', "?", "exec")
+
      def test_caret(self):
          err = self.get_exception_format(self.syntax_error_with_caret,
                                          SyntaxError)
@@ -46,6 +49,12 @@ class SyntaxTracebackCases(unittest.TestCase):
          self.assertTrue(err[2].count('\n') == 1) # and no additional newline
          self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
  
+        err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
+                                        SyntaxError)
+        self.assertIn("^", err[2]) # third line has caret
+        self.assertTrue(err[2].count('\n') == 1) # and no additional newline
+        self.assertTrue(err[1].find("+") == err[2].find("^")) # in the right place
+
      def test_nocaret(self):
          exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
          err = traceback.format_exception_only(SyntaxError, exc)
diff --git a/Misc/NEWS b/Misc/NEWS

index e470fa90286325ffd94093d4181492493fe20730..4e3aa45a82e66cb3b65f456759ff0b2e3ab19fa1 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@ What's New in Python 3.3.4 release candidate 1?
  Core and Builtins
  -----------------
  
+- Issue #2382: SyntaxError cursor "^" is now written at correct position in most
+  cases when multibyte characters are in line (before "^").  This still not
+  works correctly with wide East Asian characters.
+
  - Issue #18960: The first line of Python script could be executed twice when
    the source encoding was specified on the second line.  Now the source encoding
    declaration on the second line isn't effective if the first line contains
diff --git a/Python/pythonrun.c b/Python/pythonrun.c

index e02dbe2be181e8f3b870da774f8d909e8e3c782b..91d56b78eea21cf1a848954226abf3a142fffd08 100644 (file)
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -2226,6 +2226,7 @@ err_input(perrdetail *err)
      PyObject *v, *w, *errtype, *errtext;
      PyObject *msg_obj = NULL;
      char *msg = NULL;
+    int offset = err->offset;
  
      errtype = PyExc_SyntaxError;
      switch (err->error) {
@@ -2310,11 +2311,20 @@ err_input(perrdetail *err)
          errtext = Py_None;
          Py_INCREF(Py_None);
      } else {
-        errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
+        errtext = PyUnicode_DecodeUTF8(err->text, err->offset,
                                         "replace");
+        if (errtext != NULL) {
+            Py_ssize_t len = strlen(err->text);
+            offset = (int)PyUnicode_GET_LENGTH(errtext);
+            if (len != err->offset) {
+                Py_DECREF(errtext);
+                errtext = PyUnicode_DecodeUTF8(err->text, len,
+                                               "replace");
+            }
+        }
      }
      v = Py_BuildValue("(OiiN)", err->filename,
-                      err->lineno, err->offset, errtext);
+                      err->lineno, offset, errtext);
      if (v != NULL) {
          if (msg_obj)
              w = Py_BuildValue("(OO)", msg_obj, v);
author	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 21 Jan 2014 20:26:52 +0000 (22:26 +0200)
Lib/test/test_exceptions.py		patch \| blob \| history
Lib/test/test_traceback.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Python/pythonrun.c		patch \| blob \| history