Close #19378: address flaws in the new dis module APIs

author Nick Coghlan <ncoghlan@gmail.com>

Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)

committer Nick Coghlan <ncoghlan@gmail.com>

Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)
author Nick Coghlan <ncoghlan@gmail.com>
Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)
committer Nick Coghlan <ncoghlan@gmail.com>
Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst

index 40aaeebeda6393a1876ce430ca3d5c927fe45a14..79ec9d7310adf69911b0b3756a96271542d1a52f 100644 (file)
--- a/Doc/library/dis.rst
+++ b/Doc/library/dis.rst
@@ -44,36 +44,39 @@ The bytecode analysis API allows pieces of Python code to be wrapped in a
  :class:`Bytecode` object that provides easy access to details of the
  compiled code.
  
-.. class:: Bytecode
+.. class:: Bytecode(x, *, first_line=None)
  
-   The bytecode operations of a piece of code
+   Analyse the bytecode corresponding to a function, method, string of
+   source code, or a code object (as returned by :func:`compile`).
  
-   This is a convenient wrapper around many of the functions listed below.
-   Instantiate it with a function, method, string of code, or a code object
-   (as returned by :func:`compile`).
+   This is a convenience wrapper around many of the functions listed below,
+   most notably :func:`get_instructions`, as iterating over a
+   :class:`ByteCode` instance yields the bytecode operations as
+   :class:`Instruction` instances.
  
-   Iterating over this yields the bytecode operations as :class:`Instruction`
-   instances.
+   If *first_line* is not None, it indicates the line number that should
+   be reported for the first source line in the disassembled code.
+   Otherwise, the source line information (if any) is taken directly from
+   the disassembled code object.
  
     .. data:: codeobj
  
        The compiled code object.
  
-   .. method:: display_code(*, file=None)
+   .. data:: first_line
  
-      Print a formatted view of the bytecode operations, like :func:`dis`.
+      The first source line of the code object (if available)
+
+   .. method:: dis()
+
+      Return a formatted view of the bytecode operations (the same as
+      printed by :func:`dis`, but returned as a multi-line string).
  
     .. method:: info()
  
        Return a formatted multi-line string with detailed information about the
        code object, like :func:`code_info`.
  
-   .. method:: show_info(*, file=None)
-
-      Print the information about the code object as returned by :meth:`info`.
-
-   .. versionadded:: 3.4
-
  Example::
  
      >>> bytecode = dis.Bytecode(myfunc)
@@ -176,7 +179,7 @@ object isn't useful:
        Added ``file`` parameter
  
  
-.. function:: get_instructions(x, *, line_offset=0)
+.. function:: get_instructions(x, *, first_line=None)
  
     Return an iterator over the instructions in the supplied function, method,
     source code string or code object.
@@ -184,8 +187,10 @@ object isn't useful:
     The iterator generates a series of :class:`Instruction` named tuples
     giving the details of each operation in the supplied code.
  
-   The given *line_offset* is added to the ``starts_line`` attribute of any
-   instructions that start a new line.
+   If *first_line* is not None, it indicates the line number that should
+   be reported for the first source line in the disassembled code.
+   Otherwise, the source line information (if any) is taken directly from
+   the disassembled code object.
  
     .. versionadded:: 3.4
  
diff --git a/Lib/dis.py b/Lib/dis.py

index 2631f460912020c29a1a407ca407b2dfad065bec..1fafcc5346f50f64dae21df9a13694c80842f490 100644 (file)
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -3,6 +3,7 @@
  import sys
  import types
  import collections
+import io
  
  from opcode import *
  from opcode import __all__ as _opcodes_all
@@ -34,7 +35,7 @@ def dis(x=None, *, file=None):
  
      """
      if x is None:
-        distb()
+        distb(file=file)
          return
      if hasattr(x, '__func__'):  # Method
          x = x.__func__
@@ -46,7 +47,7 @@ def dis(x=None, *, file=None):
              if isinstance(x1, _have_code):
                  print("Disassembly of %s:" % name, file=file)
                  try:
-                    dis(x1)
+                    dis(x1, file=file)
                  except TypeError as msg:
                      print("Sorry:", msg, file=file)
                  print(file=file)
@@ -203,21 +204,27 @@ class Instruction(_Instruction):
              # Column: Opcode argument details
              if self.argrepr:
                  fields.append('(' + self.argrepr + ')')
-        return ' '.join(fields)
+        return ' '.join(fields).rstrip()
  
  
-def get_instructions(x, *, line_offset=0):
+def get_instructions(x, *, first_line=None):
      """Iterator for the opcodes in methods, functions or code
  
      Generates a series of Instruction named tuples giving the details of
      each operations in the supplied code.
  
-    The given line offset is added to the 'starts_line' attribute of any
-    instructions that start a new line.
+    If *first_line* is not None, it indicates the line number that should
+    be reported for the first source line in the disassembled code.
+    Otherwise, the source line information (if any) is taken directly from
+    the disassembled code object.
      """
      co = _get_code_object(x)
      cell_names = co.co_cellvars + co.co_freevars
      linestarts = dict(findlinestarts(co))
+    if first_line is not None:
+        line_offset = first_line - co.co_firstlineno
+    else:
+        line_offset = 0
      return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
                                     co.co_consts, cell_names, linestarts,
                                     line_offset)
@@ -320,13 +327,14 @@ def disassemble(co, lasti=-1, *, file=None):
  
  def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
                         constants=None, cells=None, linestarts=None,
-                       *, file=None):
+                       *, file=None, line_offset=0):
      # Omit the line number column entirely if we have no line number info
      show_lineno = linestarts is not None
      # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
      lineno_width = 3 if show_lineno else 0
      for instr in _get_instructions_bytes(code, varnames, names,
-                                         constants, cells, linestarts):
+                                         constants, cells, linestarts,
+                                         line_offset=line_offset):
          new_source_line = (show_lineno and
                             instr.starts_line is not None and
                             instr.offset > 0)
@@ -398,40 +406,44 @@ class Bytecode:
  
      Iterating over this yields the bytecode operations as Instruction instances.
      """
-    def __init__(self, x):
-        self.codeobj = _get_code_object(x)
-        self.cell_names = self.codeobj.co_cellvars + self.codeobj.co_freevars
-        self.linestarts = dict(findlinestarts(self.codeobj))
-        self.line_offset = 0
-        self.original_object = x
+    def __init__(self, x, *, first_line=None):
+        self.codeobj = co = _get_code_object(x)
+        if first_line is None:
+            self.first_line = co.co_firstlineno
+            self._line_offset = 0
+        else:
+            self.first_line = first_line
+            self._line_offset = first_line - co.co_firstlineno
+        self._cell_names = co.co_cellvars + co.co_freevars
+        self._linestarts = dict(findlinestarts(co))
+        self._original_object = x
  
      def __iter__(self):
          co = self.codeobj
          return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
-                                   co.co_consts, self.cell_names,
-                                   self.linestarts, self.line_offset)
+                                       co.co_consts, self._cell_names,
+                                       self._linestarts,
+                                       line_offset=self._line_offset)
  
      def __repr__(self):
-        return "{}({!r})".format(self.__class__.__name__, self.original_object)
+        return "{}({!r})".format(self.__class__.__name__,
+                                 self._original_object)
  
      def info(self):
          """Return formatted information about the code object."""
          return _format_code_info(self.codeobj)
  
-    def show_info(self, *, file=None):
-        """Print the information about the code object as returned by info()."""
-        print(self.info(), file=file)
-
-    def display_code(self, *, file=None):
-        """Print a formatted view of the bytecode operations.
-        """
+    def dis(self):
+        """Return a formatted view of the bytecode operations."""
          co = self.codeobj
-        return _disassemble_bytes(co.co_code, varnames=co.co_varnames,
-                                  names=co.co_names, constants=co.co_consts,
-                                  cells=self.cell_names,
-                                  linestarts=self.linestarts,
-                                  file=file
-                                 )
+        with io.StringIO() as output:
+            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+                               names=co.co_names, constants=co.co_consts,
+                               cells=self._cell_names,
+                               linestarts=self._linestarts,
+                               line_offset=self._line_offset,
+                               file=output)
+            return output.getvalue()
  
  
  def _test():
diff --git a/Lib/test/bytecode_helper.py b/Lib/test/bytecode_helper.py

index c4943cda00de4e6dc021d2447388d4cb077835d4..58b4209f5543bcb19a2d9d90102e283282936a4d 100644 (file)
--- a/Lib/test/bytecode_helper.py
+++ b/Lib/test/bytecode_helper.py
@@ -14,37 +14,6 @@ class BytecodeTestCase(unittest.TestCase):
          dis.dis(co, file=s)
          return s.getvalue()
  
-    def assertInstructionMatches(self, instr, expected, *, line_offset=0):
-        # Deliberately test opname first, since that gives a more
-        # meaningful error message than testing opcode
-        self.assertEqual(instr.opname, expected.opname)
-        self.assertEqual(instr.opcode, expected.opcode)
-        self.assertEqual(instr.arg, expected.arg)
-        self.assertEqual(instr.argval, expected.argval)
-        self.assertEqual(instr.argrepr, expected.argrepr)
-        self.assertEqual(instr.offset, expected.offset)
-        if expected.starts_line is None:
-            self.assertIsNone(instr.starts_line)
-        else:
-            self.assertEqual(instr.starts_line,
-                                expected.starts_line + line_offset)
-        self.assertEqual(instr.is_jump_target, expected.is_jump_target)
-
-
-    def assertBytecodeExactlyMatches(self, x, expected, *, line_offset=0):
-        """Throws AssertionError if any discrepancy is found in bytecode
-
-        *x* is the object to be introspected
-        *expected* is a list of dis.Instruction objects
-
-        Set *line_offset* as appropriate to adjust for the location of the
-        object to be disassembled within the test file. If the expected list
-        assumes the first line is line 1, then an appropriate offset would be
-        ``1 - f.__code__.co_firstlineno``.
-        """
-        actual = dis.get_instructions(x, line_offset=line_offset)
-        self.assertEqual(list(actual), expected)
-
      def assertInBytecode(self, x, opname, argval=_UNSPECIFIED):
          """Returns instr if op is found, otherwise throws AssertionError"""
          for instr in dis.get_instructions(x):
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py

index 8610c5dcd81b1d0257a63ede1e00eee979d079b0..efc82f1aee3f00279002d9f542854982bb4f8977 100644 (file)
--- a/Lib/test/test_dis.py
+++ b/Lib/test/test_dis.py
@@ -8,6 +8,7 @@ import sys
  import dis
  import io
  import types
+import contextlib
  
  class _C:
      def __init__(self, x):
@@ -176,30 +177,20 @@ dis_compound_stmt_str = """\
  class DisTests(unittest.TestCase):
  
      def get_disassembly(self, func, lasti=-1, wrapper=True):
-        s = io.StringIO()
-        save_stdout = sys.stdout
-        sys.stdout = s
-        try:
+        # We want to test the default printing behaviour, not the file arg
+        output = io.StringIO()
+        with contextlib.redirect_stdout(output):
              if wrapper:
                  dis.dis(func)
              else:
                  dis.disassemble(func, lasti)
-        finally:
-            sys.stdout = save_stdout
-        # Trim trailing blanks (if any).
-        return [line.rstrip() for line in s.getvalue().splitlines()]
+        return output.getvalue()
  
      def get_disassemble_as_string(self, func, lasti=-1):
-        return '\n'.join(self.get_disassembly(func, lasti, False))
+        return self.get_disassembly(func, lasti, False)
  
      def do_disassembly_test(self, func, expected):
-        lines = self.get_disassembly(func)
-        expected = expected.splitlines()
-        if expected != lines:
-            self.fail(
-                "events did not match expectation:\n" +
-                "\n".join(difflib.ndiff(expected,
-                                        lines)))
+        self.assertEqual(self.get_disassembly(func), expected)
  
      def test_opmap(self):
          self.assertEqual(dis.opmap["NOP"], 9)
@@ -290,6 +281,20 @@ class DisTests(unittest.TestCase):
      def test_dis_object(self):
          self.assertRaises(TypeError, dis.dis, object())
  
+class DisWithFileTests(DisTests):
+
+    # Run the tests again, using the file arg instead of print
+    def get_disassembly(self, func, lasti=-1, wrapper=True):
+        # We want to test the default printing behaviour, not the file arg
+        output = io.StringIO()
+        if wrapper:
+            dis.dis(func, file=output)
+        else:
+            dis.disassemble(func, lasti, file=output)
+        return output.getvalue()
+
+
+
  code_info_code_info = """\
  Name:              code_info
  Filename:          (.*)
@@ -482,26 +487,29 @@ def jumpy():
          print("OK, now we're done")
  
  # End fodder for opinfo generation tests
-expected_outer_offset = 1 - outer.__code__.co_firstlineno
-expected_jumpy_offset = 1 - jumpy.__code__.co_firstlineno
+expected_outer_line = 1
+_line_offset = outer.__code__.co_firstlineno - 1
  code_object_f = outer.__code__.co_consts[3]
+expected_f_line = code_object_f.co_firstlineno - _line_offset
  code_object_inner = code_object_f.co_consts[3]
+expected_inner_line = code_object_inner.co_firstlineno - _line_offset
+expected_jumpy_line = 1
  
  # The following lines are useful to regenerate the expected results after
  # either the fodder is modified or the bytecode generation changes
  # After regeneration, update the references to code_object_f and
  # code_object_inner before rerunning the tests
  
-#_instructions = dis.get_instructions(outer, line_offset=expected_outer_offset)
+#_instructions = dis.get_instructions(outer, first_line=expected_outer_line)
  #print('expected_opinfo_outer = [\n  ',
        #',\n  '.join(map(str, _instructions)), ',\n]', sep='')
-#_instructions = dis.get_instructions(outer(), line_offset=expected_outer_offset)
+#_instructions = dis.get_instructions(outer(), first_line=expected_outer_line)
  #print('expected_opinfo_f = [\n  ',
        #',\n  '.join(map(str, _instructions)), ',\n]', sep='')
-#_instructions = dis.get_instructions(outer()(), line_offset=expected_outer_offset)
+#_instructions = dis.get_instructions(outer()(), first_line=expected_outer_line)
  #print('expected_opinfo_inner = [\n  ',
        #',\n  '.join(map(str, _instructions)), ',\n]', sep='')
-#_instructions = dis.get_instructions(jumpy, line_offset=expected_jumpy_offset)
+#_instructions = dis.get_instructions(jumpy, first_line=expected_jumpy_line)
  #print('expected_opinfo_jumpy = [\n  ',
        #',\n  '.join(map(str, _instructions)), ',\n]', sep='')
  
@@ -671,42 +679,75 @@ expected_opinfo_jumpy = [
    Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None, argrepr='', offset=243, starts_line=None, is_jump_target=False),
  ]
  
+# One last piece of inspect fodder to check the default line number handling
+def simple(): pass
+expected_opinfo_simple = [
+  Instruction(opname='LOAD_CONST', opcode=100, arg=0, argval=None, argrepr='None', offset=0, starts_line=simple.__code__.co_firstlineno, is_jump_target=False),
+  Instruction(opname='RETURN_VALUE', opcode=83, arg=None, argval=None, argrepr='', offset=3, starts_line=None, is_jump_target=False)
+]
+
+
  class InstructionTests(BytecodeTestCase):
+
+    def test_default_first_line(self):
+        actual = dis.get_instructions(simple)
+        self.assertEqual(list(actual), expected_opinfo_simple)
+
+    def test_first_line_set_to_None(self):
+        actual = dis.get_instructions(simple, first_line=None)
+        self.assertEqual(list(actual), expected_opinfo_simple)
+
      def test_outer(self):
-        self.assertBytecodeExactlyMatches(outer, expected_opinfo_outer,
-                                          line_offset=expected_outer_offset)
+        actual = dis.get_instructions(outer, first_line=expected_outer_line)
+        self.assertEqual(list(actual), expected_opinfo_outer)
  
      def test_nested(self):
          with captured_stdout():
              f = outer()
-        self.assertBytecodeExactlyMatches(f, expected_opinfo_f,
-                                          line_offset=expected_outer_offset)
+        actual = dis.get_instructions(f, first_line=expected_f_line)
+        self.assertEqual(list(actual), expected_opinfo_f)
  
      def test_doubly_nested(self):
          with captured_stdout():
              inner = outer()()
-        self.assertBytecodeExactlyMatches(inner, expected_opinfo_inner,
-                                          line_offset=expected_outer_offset)
+        actual = dis.get_instructions(inner, first_line=expected_inner_line)
+        self.assertEqual(list(actual), expected_opinfo_inner)
  
      def test_jumpy(self):
-        self.assertBytecodeExactlyMatches(jumpy, expected_opinfo_jumpy,
-                                          line_offset=expected_jumpy_offset)
+        actual = dis.get_instructions(jumpy, first_line=expected_jumpy_line)
+        self.assertEqual(list(actual), expected_opinfo_jumpy)
  
+# get_instructions has its own tests above, so can rely on it to validate
+# the object oriented API
  class BytecodeTests(unittest.TestCase):
      def test_instantiation(self):
          # Test with function, method, code string and code object
          for obj in [_f, _C(1).__init__, "a=1", _f.__code__]:
-            b = dis.Bytecode(obj)
-            self.assertIsInstance(b.codeobj, types.CodeType)
+            with self.subTest(obj=obj):
+                b = dis.Bytecode(obj)
+                self.assertIsInstance(b.codeobj, types.CodeType)
  
          self.assertRaises(TypeError, dis.Bytecode, object())
  
      def test_iteration(self):
-        b = dis.Bytecode(_f)
-        for instr in b:
-            self.assertIsInstance(instr, dis.Instruction)
-
-        assert len(list(b)) > 0  # Iterating should yield at least 1 instruction
+        for obj in [_f, _C(1).__init__, "a=1", _f.__code__]:
+            with self.subTest(obj=obj):
+                via_object = list(dis.Bytecode(obj))
+                via_generator = list(dis.get_instructions(obj))
+                self.assertEqual(via_object, via_generator)
+
+    def test_explicit_first_line(self):
+        actual = dis.Bytecode(outer, first_line=expected_outer_line)
+        self.assertEqual(list(actual), expected_opinfo_outer)
+
+    def test_source_line_in_disassembly(self):
+        # Use the line in the source code
+        actual = dis.Bytecode(simple).dis()[:3]
+        expected = "{:>3}".format(simple.__code__.co_firstlineno)
+        self.assertEqual(actual, expected)
+        # Use an explicit first line number
+        actual = dis.Bytecode(simple, first_line=350).dis()[:3]
+        self.assertEqual(actual, "350")
  
      def test_info(self):
          self.maxDiff = 1000
@@ -714,16 +755,14 @@ class BytecodeTests(unittest.TestCase):
              b = dis.Bytecode(x)
              self.assertRegex(b.info(), expected)
  
-    def test_display_code(self):
-        b = dis.Bytecode(_f)
-        output = io.StringIO()
-        b.display_code(file=output)
-        result = [line.rstrip() for line in output.getvalue().splitlines()]
-        self.assertEqual(result, dis_f.splitlines())
+    def test_disassembled(self):
+        actual = dis.Bytecode(_f).dis()
+        self.assertEqual(actual, dis_f)
  
  
  def test_main():
-    run_unittest(DisTests, CodeInfoTests, InstructionTests, BytecodeTests)
+    run_unittest(DisTests, DisWithFileTests, CodeInfoTests,
+                 InstructionTests, BytecodeTests)
  
  if __name__ == "__main__":
      test_main()
diff --git a/Misc/NEWS b/Misc/NEWS

index 13f6b70b146820565f475edb6dd989035a7221c0..2c385391599c6e37e629c0581c72cd9a971b4aaf 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -31,6 +31,20 @@ Core and Builtins
  Library
  -------
  
+- Issue #19378: Fixed a number of cases in the dis module where the new
+  "file" parameter was not being honoured correctly
+
+- Issue #19378: Removed the "dis.Bytecode.show_info" method
+
+- Issue #19378: Renamed the "dis.Bytecode.display_code" method to
+  "dis.Bytecode.dis" and converted it to returning a string rather than
+  printing output.
+
+- Issue #19378: the "line_offset" parameter in the new "dis.get_instructions"
+  API has been renamed to "first_line" (and the default value and usage
+  changed accordingly). This should reduce confusion with the more common use
+  of "offset" in the dis docs to refer to bytecode offsets.
+
  - Issue #18678: Corrected spwd struct member names in spwd module:
    sp_nam->sp_namp, and sp_pwd->sp_pwdp.  The old names are kept as extra
    structseq members, for backward compatibility.
@@ -169,6 +183,12 @@ Library
  Tests
  -----
  
+- Issue #19378: the main dis module tests are now run with both stdout
+  redirection *and* passing an explicit file parameter
+
+- Issue #19378: removed the not-actually-helpful assertInstructionMatches
+  and assertBytecodeExactlyMatches helpers from bytecode_helper
+
  - Issue #18702: All skipped tests now reported as skipped.
  
  - Issue #19439: interpreter embedding tests are now executed on Windows
author	Nick Coghlan <ncoghlan@gmail.com>
	Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)
committer	Nick Coghlan <ncoghlan@gmail.com>
	Wed, 6 Nov 2013 12:08:36 +0000 (22:08 +1000)
Doc/library/dis.rst		patch \| blob \| history
Lib/dis.py		patch \| blob \| history
Lib/test/bytecode_helper.py		patch \| blob \| history
Lib/test/test_dis.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history