do not call into python api if an exception is set (#24022)

author Benjamin Peterson <benjamin@python.org>

Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)

committer Benjamin Peterson <benjamin@python.org>

Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)
author Benjamin Peterson <benjamin@python.org>
Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)
committer Benjamin Peterson <benjamin@python.org>
Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py

index 611667690fe3e2efe65e4141a2be2790dd6926c3..cff3c9ea0b5ed6f141c4ea6b2fdd583dcb9db3ba 100644 (file)
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -1,9 +1,11 @@
  import math
+import os
  import unittest
  import sys
  import _ast
+import tempfile
  import types
-from test import support
+from test import support, script_helper
  
  class TestSpecifics(unittest.TestCase):
  
@@ -492,6 +494,16 @@ if 1:
          self.assertInvalidSingle('f()\nxy # blah\nblah()')
          self.assertInvalidSingle('x = 5 # comment\nx = 6\n')
  
+    def test_particularly_evil_undecodable(self):
+        # Issue 24022
+        src = b'0000\x00\n00000000000\n\x00\n\x9e\n'
+        with tempfile.TemporaryDirectory() as tmpd:
+            fn = os.path.join(tmpd, "bad.py")
+            with open(fn, "wb") as fp:
+                fp.write(src)
+            res = script_helper.run_python_until_end(fn)[0]
+        self.assertIn(b"Non-UTF-8", res.err)
+
      @support.cpython_only
      def test_compiler_recursion_limit(self):
          # Expected limit is sys.getrecursionlimit() * the scaling factor
diff --git a/Misc/NEWS b/Misc/NEWS

index a6a3d822dbdd8bb6d2f60241d4abe2279c53c384..183f7d19ca69ccd2695a5c914b72c88c12fbe1d7 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ Release date: tba
  Core and Builtins
  -----------------
  
+- Issue #24022: Fix tokenizer crash when processing undecodable source code.
+
  - Issue #23309: Avoid a deadlock at shutdown if a daemon thread is aborted
    while it is holding a lock to a buffered I/O object, and the main thread
    tries to use the same I/O object (typically stdout or stderr).  A fatal
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 22accd1061aeaffaeca1bfdf013404fa169b0853..5e041ea5b309dd322dde006164d9900ef16abfde 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1301,6 +1301,8 @@ verify_identifier(struct tok_state *tok)
  {
      PyObject *s;
      int result;
+    if (tok->decoding_erred)
+        return 0;
      s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
      if (s == NULL || PyUnicode_READY(s) == -1) {
          if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
@@ -1469,11 +1471,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
              c = tok_nextc(tok);
          }
          tok_backup(tok, c);
-        if (nonascii &&
-            !verify_identifier(tok)) {
-            tok->done = E_IDENTIFIER;
+        if (nonascii && !verify_identifier(tok))
              return ERRORTOKEN;
-        }
          *p_start = tok->start;
          *p_end = tok->cur;
          return NAME;
author	Benjamin Peterson <benjamin@python.org>
	Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)
committer	Benjamin Peterson <benjamin@python.org>
	Tue, 21 Apr 2015 16:05:19 +0000 (12:05 -0400)
Lib/test/test_compile.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Parser/tokenizer.c		patch \| blob \| history