bpo-38115: Deal with invalid bytecode offsets in lnotab (GH-16079)

author T. Wouters <thomas@python.org>

Sat, 28 Sep 2019 14:49:15 +0000 (16:49 +0200)

committer Gregory P. Smith <greg@krypto.org>

Sat, 28 Sep 2019 14:49:15 +0000 (07:49 -0700)
author T. Wouters <thomas@python.org>
Sat, 28 Sep 2019 14:49:15 +0000 (16:49 +0200)
committer Gregory P. Smith <greg@krypto.org>
Sat, 28 Sep 2019 14:49:15 +0000 (07:49 -0700)
diff --git a/Lib/dis.py b/Lib/dis.py

index a25fb2b417643dec053935d6cd96c0d3748af514..10e5f7fb08ab21c521e08709b7d46751fa3aa5cc 100644 (file)
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -454,6 +454,7 @@ def findlinestarts(code):
      """
      byte_increments = code.co_lnotab[0::2]
      line_increments = code.co_lnotab[1::2]
+    bytecode_len = len(code.co_code)
  
      lastlineno = None
      lineno = code.co_firstlineno
@@ -464,6 +465,10 @@ def findlinestarts(code):
                  yield (addr, lineno)
                  lastlineno = lineno
              addr += byte_incr
+            if addr >= bytecode_len:
+                # The rest of the lnotab byte offsets are past the end of
+                # the bytecode, so the lines were optimized away.
+                return
          if line_incr >= 0x80:
              # line_increments is an array of 8-bit signed integers
              line_incr -= 0x100
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py

index 47dee33076c5348fd5b71ff0981d8cb10dc5cbd4..23cc36c605375a3dc8dcf33a07c250ecd2ec187a 100644 (file)
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -40,6 +40,20 @@ class TestTranforms(BytecodeTestCase):
                  self.fail(f'{instr.opname} at {instr.offset} '
                            f'jumps to {tgt.opname} at {tgt.offset}')
  
+    def check_lnotab(self, code):
+        "Check that the lnotab byte offsets are sensible."
+        code = dis._get_code_object(code)
+        lnotab = list(dis.findlinestarts(code))
+        # Don't bother checking if the line info is sensible, because
+        # most of the line info we can get at comes from lnotab.
+        min_bytecode = min(t[0] for t in lnotab)
+        max_bytecode = max(t[0] for t in lnotab)
+        self.assertGreaterEqual(min_bytecode, 0)
+        self.assertLess(max_bytecode, len(code.co_code))
+        # This could conceivably test more (and probably should, as there
+        # aren't very many tests of lnotab), if peepholer wasn't scheduled
+        # to be replaced anyway.
+
      def test_unot(self):
          # UNARY_NOT POP_JUMP_IF_FALSE  -->  POP_JUMP_IF_TRUE'
          def unot(x):
@@ -48,6 +62,7 @@ class TestTranforms(BytecodeTestCase):
          self.assertNotInBytecode(unot, 'UNARY_NOT')
          self.assertNotInBytecode(unot, 'POP_JUMP_IF_FALSE')
          self.assertInBytecode(unot, 'POP_JUMP_IF_TRUE')
+        self.check_lnotab(unot)
  
      def test_elim_inversion_of_is_or_in(self):
          for line, cmp_op in (
@@ -58,6 +73,7 @@ class TestTranforms(BytecodeTestCase):
              ):
              code = compile(line, '', 'single')
              self.assertInBytecode(code, 'COMPARE_OP', cmp_op)
+            self.check_lnotab(code)
  
      def test_global_as_constant(self):
          # LOAD_GLOBAL None/True/False  -->  LOAD_CONST None/True/False
@@ -75,6 +91,7 @@ class TestTranforms(BytecodeTestCase):
          for func, elem in ((f, None), (g, True), (h, False)):
              self.assertNotInBytecode(func, 'LOAD_GLOBAL')
              self.assertInBytecode(func, 'LOAD_CONST', elem)
+            self.check_lnotab(func)
  
          def f():
              'Adding a docstring made this test fail in Py2.5.0'
@@ -82,6 +99,7 @@ class TestTranforms(BytecodeTestCase):
  
          self.assertNotInBytecode(f, 'LOAD_GLOBAL')
          self.assertInBytecode(f, 'LOAD_CONST', None)
+        self.check_lnotab(f)
  
      def test_while_one(self):
          # Skip over:  LOAD_CONST trueconst  POP_JUMP_IF_FALSE xx
@@ -93,6 +111,7 @@ class TestTranforms(BytecodeTestCase):
              self.assertNotInBytecode(f, elem)
          for elem in ('JUMP_ABSOLUTE',):
              self.assertInBytecode(f, elem)
+        self.check_lnotab(f)
  
      def test_pack_unpack(self):
          for line, elem in (
@@ -104,6 +123,7 @@ class TestTranforms(BytecodeTestCase):
              self.assertInBytecode(code, elem)
              self.assertNotInBytecode(code, 'BUILD_TUPLE')
              self.assertNotInBytecode(code, 'UNPACK_TUPLE')
+            self.check_lnotab(code)
  
      def test_folding_of_tuples_of_constants(self):
          for line, elem in (
@@ -116,6 +136,7 @@ class TestTranforms(BytecodeTestCase):
              code = compile(line,'','single')
              self.assertInBytecode(code, 'LOAD_CONST', elem)
              self.assertNotInBytecode(code, 'BUILD_TUPLE')
+            self.check_lnotab(code)
  
          # Long tuples should be folded too.
          code = compile(repr(tuple(range(10000))),'','single')
@@ -124,6 +145,7 @@ class TestTranforms(BytecodeTestCase):
          load_consts = [instr for instr in dis.get_instructions(code)
                                if instr.opname == 'LOAD_CONST']
          self.assertEqual(len(load_consts), 2)
+        self.check_lnotab(code)
  
          # Bug 1053819:  Tuple of constants misidentified when presented with:
          # . . . opcode_with_arg 100   unary_opcode   BUILD_TUPLE 1  . . .
@@ -141,6 +163,7 @@ class TestTranforms(BytecodeTestCase):
                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
              ],)
+        self.check_lnotab(crater)
  
      def test_folding_of_lists_of_constants(self):
          for line, elem in (
@@ -153,6 +176,7 @@ class TestTranforms(BytecodeTestCase):
              code = compile(line, '', 'single')
              self.assertInBytecode(code, 'LOAD_CONST', elem)
              self.assertNotInBytecode(code, 'BUILD_LIST')
+            self.check_lnotab(code)
  
      def test_folding_of_sets_of_constants(self):
          for line, elem in (
@@ -166,6 +190,7 @@ class TestTranforms(BytecodeTestCase):
              code = compile(line, '', 'single')
              self.assertNotInBytecode(code, 'BUILD_SET')
              self.assertInBytecode(code, 'LOAD_CONST', elem)
+            self.check_lnotab(code)
  
          # Ensure that the resulting code actually works:
          def f(a):
@@ -176,9 +201,11 @@ class TestTranforms(BytecodeTestCase):
  
          self.assertTrue(f(3))
          self.assertTrue(not f(4))
+        self.check_lnotab(f)
  
          self.assertTrue(not g(3))
          self.assertTrue(g(4))
+        self.check_lnotab(g)
  
  
      def test_folding_of_binops_on_constants(self):
@@ -203,41 +230,50 @@ class TestTranforms(BytecodeTestCase):
              self.assertInBytecode(code, 'LOAD_CONST', elem)
              for instr in dis.get_instructions(code):
                  self.assertFalse(instr.opname.startswith('BINARY_'))
+            self.check_lnotab(code)
  
          # Verify that unfoldables are skipped
          code = compile('a=2+"b"', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', 2)
          self.assertInBytecode(code, 'LOAD_CONST', 'b')
+        self.check_lnotab(code)
  
          # Verify that large sequences do not result from folding
          code = compile('a="x"*10000', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', 10000)
          self.assertNotIn("x"*10000, code.co_consts)
+        self.check_lnotab(code)
          code = compile('a=1<<1000', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', 1000)
          self.assertNotIn(1<<1000, code.co_consts)
+        self.check_lnotab(code)
          code = compile('a=2**1000', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', 1000)
          self.assertNotIn(2**1000, code.co_consts)
+        self.check_lnotab(code)
  
      def test_binary_subscr_on_unicode(self):
          # valid code get optimized
          code = compile('"foo"[0]', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', 'f')
          self.assertNotInBytecode(code, 'BINARY_SUBSCR')
+        self.check_lnotab(code)
          code = compile('"\u0061\uffff"[1]', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', '\uffff')
          self.assertNotInBytecode(code,'BINARY_SUBSCR')
+        self.check_lnotab(code)
  
          # With PEP 393, non-BMP char get optimized
          code = compile('"\U00012345"[0]', '', 'single')
          self.assertInBytecode(code, 'LOAD_CONST', '\U00012345')
          self.assertNotInBytecode(code, 'BINARY_SUBSCR')
+        self.check_lnotab(code)
  
          # invalid code doesn't get optimized
          # out of range
          code = compile('"fuu"[10]', '', 'single')
          self.assertInBytecode(code, 'BINARY_SUBSCR')
+        self.check_lnotab(code)
  
      def test_folding_of_unaryops_on_constants(self):
          for line, elem in (
@@ -252,13 +288,15 @@ class TestTranforms(BytecodeTestCase):
              self.assertInBytecode(code, 'LOAD_CONST', elem)
              for instr in dis.get_instructions(code):
                  self.assertFalse(instr.opname.startswith('UNARY_'))
+            self.check_lnotab(code)
  
          # Check that -0.0 works after marshaling
          def negzero():
              return -(1.0-1.0)
  
-        for instr in dis.get_instructions(code):
+        for instr in dis.get_instructions(negzero):
              self.assertFalse(instr.opname.startswith('UNARY_'))
+        self.check_lnotab(negzero)
  
          # Verify that unfoldables are skipped
          for line, elem, opname in (
@@ -268,6 +306,7 @@ class TestTranforms(BytecodeTestCase):
              code = compile(line, '', 'single')
              self.assertInBytecode(code, 'LOAD_CONST', elem)
              self.assertInBytecode(code, opname)
+            self.check_lnotab(code)
  
      def test_elim_extra_return(self):
          # RETURN LOAD_CONST None RETURN  -->  RETURN
@@ -277,6 +316,7 @@ class TestTranforms(BytecodeTestCase):
          returns = [instr for instr in dis.get_instructions(f)
                            if instr.opname == 'RETURN_VALUE']
          self.assertEqual(len(returns), 1)
+        self.check_lnotab(f)
  
      def test_elim_jump_to_return(self):
          # JUMP_FORWARD to RETURN -->  RETURN
@@ -290,6 +330,7 @@ class TestTranforms(BytecodeTestCase):
          returns = [instr for instr in dis.get_instructions(f)
                            if instr.opname == 'RETURN_VALUE']
          self.assertEqual(len(returns), 2)
+        self.check_lnotab(f)
  
      def test_elim_jump_to_uncond_jump(self):
          # POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump
@@ -302,6 +343,7 @@ class TestTranforms(BytecodeTestCase):
              else:
                  baz()
          self.check_jump_targets(f)
+        self.check_lnotab(f)
  
      def test_elim_jump_to_uncond_jump2(self):
          # POP_JUMP_IF_FALSE to JUMP_ABSOLUTE --> POP_JUMP_IF_FALSE to non-jump
@@ -312,6 +354,7 @@ class TestTranforms(BytecodeTestCase):
                      or d):
                      a = foo()
          self.check_jump_targets(f)
+        self.check_lnotab(f)
  
      def test_elim_jump_to_uncond_jump3(self):
          # Intentionally use two-line expressions to test issue37213.
@@ -320,18 +363,21 @@ class TestTranforms(BytecodeTestCase):
              return ((a and b)
                      and c)
          self.check_jump_targets(f)
+        self.check_lnotab(f)
          self.assertEqual(count_instr_recursively(f, 'JUMP_IF_FALSE_OR_POP'), 2)
          # JUMP_IF_TRUE_OR_POP to JUMP_IF_TRUE_OR_POP --> JUMP_IF_TRUE_OR_POP to non-jump
          def f(a, b, c):
              return ((a or b)
                      or c)
          self.check_jump_targets(f)
+        self.check_lnotab(f)
          self.assertEqual(count_instr_recursively(f, 'JUMP_IF_TRUE_OR_POP'), 2)
          # JUMP_IF_FALSE_OR_POP to JUMP_IF_TRUE_OR_POP --> POP_JUMP_IF_FALSE to non-jump
          def f(a, b, c):
              return ((a and b)
                      or c)
          self.check_jump_targets(f)
+        self.check_lnotab(f)
          self.assertNotInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
          self.assertInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
          self.assertInBytecode(f, 'POP_JUMP_IF_FALSE')
@@ -340,6 +386,7 @@ class TestTranforms(BytecodeTestCase):
              return ((a or b)
                      and c)
          self.check_jump_targets(f)
+        self.check_lnotab(f)
          self.assertNotInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
          self.assertInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
          self.assertInBytecode(f, 'POP_JUMP_IF_TRUE')
@@ -360,6 +407,7 @@ class TestTranforms(BytecodeTestCase):
          returns = [instr for instr in dis.get_instructions(f)
                            if instr.opname == 'RETURN_VALUE']
          self.assertLessEqual(len(returns), 6)
+        self.check_lnotab(f)
  
      def test_elim_jump_after_return2(self):
          # Eliminate dead code: jumps immediately after returns can't be reached
@@ -374,6 +422,7 @@ class TestTranforms(BytecodeTestCase):
          returns = [instr for instr in dis.get_instructions(f)
                            if instr.opname == 'RETURN_VALUE']
          self.assertLessEqual(len(returns), 2)
+        self.check_lnotab(f)
  
      def test_make_function_doesnt_bail(self):
          def f():
@@ -381,6 +430,7 @@ class TestTranforms(BytecodeTestCase):
                  pass
              return g
          self.assertNotInBytecode(f, 'BINARY_ADD')
+        self.check_lnotab(f)
  
      def test_constant_folding(self):
          # Issue #11244: aggressive constant folding.
@@ -401,17 +451,20 @@ class TestTranforms(BytecodeTestCase):
                  self.assertFalse(instr.opname.startswith('UNARY_'))
                  self.assertFalse(instr.opname.startswith('BINARY_'))
                  self.assertFalse(instr.opname.startswith('BUILD_'))
+            self.check_lnotab(code)
  
      def test_in_literal_list(self):
          def containtest():
              return x in [a, b]
          self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0)
+        self.check_lnotab(containtest)
  
      def test_iterate_literal_list(self):
          def forloop():
              for x in [a, b]:
                  pass
          self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0)
+        self.check_lnotab(forloop)
  
      def test_condition_with_binop_with_bools(self):
          def f():
@@ -419,6 +472,7 @@ class TestTranforms(BytecodeTestCase):
                  return 1
              return 0
          self.assertEqual(f(), 1)
+        self.check_lnotab(f)
  
      def test_if_with_if_expression(self):
          # Check bpo-37289
@@ -427,6 +481,19 @@ class TestTranforms(BytecodeTestCase):
                  return True
              return False
          self.assertTrue(f(True))
+        self.check_lnotab(f)
+
+    def test_trailing_nops(self):
+        # Check the lnotab of a function that even after trivial
+        # optimization has trailing nops, which the lnotab adjustment has to
+        # handle properly (bpo-38115).
+        def f(x):
+            while 1:
+                return 3
+            while 1:
+                return 5
+            return 6
+        self.check_lnotab(f)
  
  
  class TestBuglets(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst

new file mode 100644 (file)

index 0000000..5119c05
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst
@@ -0,0 +1 @@
+Fix a bug in dis.findlinestarts() where it would return invalid bytecode offsets. Document that a code object's co_lnotab can contain invalid bytecode offsets.
+\ No newline at end of file
diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt

index 3dab2b98661695a6089d1668d0054c997df09efb..71a297971828c015fbbebe09490c4a3d6ced1b7e 100644 (file)
--- a/Objects/lnotab_notes.txt
+++ b/Objects/lnotab_notes.txt
@@ -3,7 +3,9 @@ All about co_lnotab, the line number table.
  Code objects store a field named co_lnotab.  This is an array of unsigned bytes
  disguised as a Python bytes object.  It is used to map bytecode offsets to
  source code line #s for tracebacks and to identify line number boundaries for
-line tracing.
+line tracing. Because of internals of the peephole optimizer, it's possible
+for lnotab to contain bytecode offsets that are no longer valid (for example
+if the optimizer removed the last line in a function).
  
  The array is conceptually a compressed list of
      (bytecode offset increment, line number increment)
author	T. Wouters <thomas@python.org>
	Sat, 28 Sep 2019 14:49:15 +0000 (16:49 +0200)
committer	Gregory P. Smith <greg@krypto.org>
	Sat, 28 Sep 2019 14:49:15 +0000 (07:49 -0700)
Lib/dis.py		patch \| blob \| history
Lib/test/test_peepholer.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst	[new file with mode: 0644]	patch \| blob
Objects/lnotab_notes.txt		patch \| blob \| history