Issue #25317: Converted doctests in test_tokenize to unittests.

author Serhiy Storchaka <storchaka@gmail.com>

Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)

committer Serhiy Storchaka <storchaka@gmail.com>

Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)
committer Serhiy Storchaka <storchaka@gmail.com>
Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)
diff --cc Lib/test/test_tokenize.py

index b7ca08949a3aa23dfe6f342d2cd35994ba57a04b,40b0c90dde608824c19d1b8f7be8d4fde5a4642b..3b17ca6329d59610f9bbd93811efdee02b28907e
--- 1/Lib/test/test_tokenize.py
--- 2/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@@ -463,11 -420,11 +420,11 @@@ def k(x)
       OP         '['           (1, 34) (1, 35)
       NUMBER     '5'           (1, 35) (1, 36)
       OP         ']'           (1, 36) (1, 37)
+     """)
   
- Multiplicative
- 
-     >>> dump_tokens("x = 1//1*1/5*12%0x12@42")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
+     def test_multiplicative(self):
+         # Multiplicative
- -        self.check_tokenize("x = 1//1*1/5*12%0x12", """\
++        self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
       NAME       'x'           (1, 0) (1, 1)
       OP         '='           (1, 2) (1, 3)
       NUMBER     '1'           (1, 4) (1, 5)
@@@ -481,13 -438,11 +438,13 @@@
       NUMBER     '12'          (1, 13) (1, 15)
       OP         '%'           (1, 15) (1, 16)
       NUMBER     '0x12'        (1, 16) (1, 20)
+ +    OP         '@'           (1, 20) (1, 21)
+ +    NUMBER     '42'          (1, 21) (1, 23)
+     """)
   
- Unary
- 
-     >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
+     def test_unary(self):
+         # Unary
+         self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
       OP         '~'           (1, 0) (1, 1)
       NUMBER     '1'           (1, 1) (1, 2)
       OP         '^'           (1, 3) (1, 4)
@@@ -643,359 -559,10 +561,284 @@@
       NAME       'grün'        (2, 0) (2, 4)
       OP         '='           (2, 5) (2, 6)
       STRING     "U'green'"    (2, 7) (2, 15)
+     """)
   
- Async/await extension:
- 
-     >>> dump_tokens("async = 1")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++    def test_async(self):
++        # Async/await extension:
++        self.check_tokenize("async = 1", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    OP         '='           (1, 6) (1, 7)
+ +    NUMBER     '1'           (1, 8) (1, 9)
++    """)
+ +
-     >>> dump_tokens("a = (async = 1)")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("a = (async = 1)", """\
+ +    NAME       'a'           (1, 0) (1, 1)
+ +    OP         '='           (1, 2) (1, 3)
+ +    OP         '('           (1, 4) (1, 5)
+ +    NAME       'async'       (1, 5) (1, 10)
+ +    OP         '='           (1, 11) (1, 12)
+ +    NUMBER     '1'           (1, 13) (1, 14)
+ +    OP         ')'           (1, 14) (1, 15)
++    """)
+ +
-     >>> dump_tokens("async()")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async()", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    OP         '('           (1, 5) (1, 6)
+ +    OP         ')'           (1, 6) (1, 7)
++    """)
+ +
-     >>> dump_tokens("class async(Bar):pass")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("class async(Bar):pass", """\
+ +    NAME       'class'       (1, 0) (1, 5)
+ +    NAME       'async'       (1, 6) (1, 11)
+ +    OP         '('           (1, 11) (1, 12)
+ +    NAME       'Bar'         (1, 12) (1, 15)
+ +    OP         ')'           (1, 15) (1, 16)
+ +    OP         ':'           (1, 16) (1, 17)
+ +    NAME       'pass'        (1, 17) (1, 21)
++    """)
+ +
-     >>> dump_tokens("class async:pass")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("class async:pass", """\
+ +    NAME       'class'       (1, 0) (1, 5)
+ +    NAME       'async'       (1, 6) (1, 11)
+ +    OP         ':'           (1, 11) (1, 12)
+ +    NAME       'pass'        (1, 12) (1, 16)
++    """)
+ +
-     >>> dump_tokens("await = 1")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("await = 1", """\
+ +    NAME       'await'       (1, 0) (1, 5)
+ +    OP         '='           (1, 6) (1, 7)
+ +    NUMBER     '1'           (1, 8) (1, 9)
++    """)
+ +
-     >>> dump_tokens("foo.async")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("foo.async", """\
+ +    NAME       'foo'         (1, 0) (1, 3)
+ +    OP         '.'           (1, 3) (1, 4)
+ +    NAME       'async'       (1, 4) (1, 9)
++    """)
+ +
-     >>> dump_tokens("async for a in b: pass")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async for a in b: pass", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    NAME       'for'         (1, 6) (1, 9)
+ +    NAME       'a'           (1, 10) (1, 11)
+ +    NAME       'in'          (1, 12) (1, 14)
+ +    NAME       'b'           (1, 15) (1, 16)
+ +    OP         ':'           (1, 16) (1, 17)
+ +    NAME       'pass'        (1, 18) (1, 22)
++    """)
+ +
-     >>> dump_tokens("async with a as b: pass")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async with a as b: pass", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    NAME       'with'        (1, 6) (1, 10)
+ +    NAME       'a'           (1, 11) (1, 12)
+ +    NAME       'as'          (1, 13) (1, 15)
+ +    NAME       'b'           (1, 16) (1, 17)
+ +    OP         ':'           (1, 17) (1, 18)
+ +    NAME       'pass'        (1, 19) (1, 23)
++    """)
+ +
-     >>> dump_tokens("async.foo")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async.foo", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    OP         '.'           (1, 5) (1, 6)
+ +    NAME       'foo'         (1, 6) (1, 9)
++    """)
+ +
-     >>> dump_tokens("async")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async", """\
+ +    NAME       'async'       (1, 0) (1, 5)
++    """)
+ +
-     >>> dump_tokens("async\\n#comment\\nawait")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async\n#comment\nawait", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    NEWLINE    '\\n'          (1, 5) (1, 6)
+ +    COMMENT    '#comment'    (2, 0) (2, 8)
+ +    NL         '\\n'          (2, 8) (2, 9)
+ +    NAME       'await'       (3, 0) (3, 5)
++    """)
+ +
-     >>> dump_tokens("async\\n...\\nawait")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async\n...\nawait", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    NEWLINE    '\\n'          (1, 5) (1, 6)
+ +    OP         '...'         (2, 0) (2, 3)
+ +    NEWLINE    '\\n'          (2, 3) (2, 4)
+ +    NAME       'await'       (3, 0) (3, 5)
++    """)
+ +
-     >>> dump_tokens("async\\nawait")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async\nawait", """\
+ +    NAME       'async'       (1, 0) (1, 5)
+ +    NEWLINE    '\\n'          (1, 5) (1, 6)
+ +    NAME       'await'       (2, 0) (2, 5)
++    """)
+ +
-     >>> dump_tokens("foo.async + 1")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("foo.async + 1", """\
+ +    NAME       'foo'         (1, 0) (1, 3)
+ +    OP         '.'           (1, 3) (1, 4)
+ +    NAME       'async'       (1, 4) (1, 9)
+ +    OP         '+'           (1, 10) (1, 11)
+ +    NUMBER     '1'           (1, 12) (1, 13)
++    """)
+ +
-     >>> dump_tokens("async def foo(): pass")
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize("async def foo(): pass", """\
+ +    ASYNC      'async'       (1, 0) (1, 5)
+ +    NAME       'def'         (1, 6) (1, 9)
+ +    NAME       'foo'         (1, 10) (1, 13)
+ +    OP         '('           (1, 13) (1, 14)
+ +    OP         ')'           (1, 14) (1, 15)
+ +    OP         ':'           (1, 15) (1, 16)
+ +    NAME       'pass'        (1, 17) (1, 21)
- 
-     >>> dump_tokens('''async def foo():
-     ...   def foo(await):
-     ...     await = 1
-     ...   if 1:
-     ...     await
-     ... async += 1
-     ... ''')
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++    """)
++
++        self.check_tokenize('''\
++async def foo():
++  def foo(await):
++    await = 1
++  if 1:
++    await
++async += 1
++''', """\
+ +    ASYNC      'async'       (1, 0) (1, 5)
+ +    NAME       'def'         (1, 6) (1, 9)
+ +    NAME       'foo'         (1, 10) (1, 13)
+ +    OP         '('           (1, 13) (1, 14)
+ +    OP         ')'           (1, 14) (1, 15)
+ +    OP         ':'           (1, 15) (1, 16)
+ +    NEWLINE    '\\n'          (1, 16) (1, 17)
+ +    INDENT     '  '          (2, 0) (2, 2)
+ +    NAME       'def'         (2, 2) (2, 5)
+ +    NAME       'foo'         (2, 6) (2, 9)
+ +    OP         '('           (2, 9) (2, 10)
+ +    AWAIT      'await'       (2, 10) (2, 15)
+ +    OP         ')'           (2, 15) (2, 16)
+ +    OP         ':'           (2, 16) (2, 17)
+ +    NEWLINE    '\\n'          (2, 17) (2, 18)
+ +    INDENT     '    '        (3, 0) (3, 4)
+ +    AWAIT      'await'       (3, 4) (3, 9)
+ +    OP         '='           (3, 10) (3, 11)
+ +    NUMBER     '1'           (3, 12) (3, 13)
+ +    NEWLINE    '\\n'          (3, 13) (3, 14)
+ +    DEDENT     ''            (4, 2) (4, 2)
+ +    NAME       'if'          (4, 2) (4, 4)
+ +    NUMBER     '1'           (4, 5) (4, 6)
+ +    OP         ':'           (4, 6) (4, 7)
+ +    NEWLINE    '\\n'          (4, 7) (4, 8)
+ +    INDENT     '    '        (5, 0) (5, 4)
+ +    AWAIT      'await'       (5, 4) (5, 9)
+ +    NEWLINE    '\\n'          (5, 9) (5, 10)
+ +    DEDENT     ''            (6, 0) (6, 0)
+ +    DEDENT     ''            (6, 0) (6, 0)
+ +    NAME       'async'       (6, 0) (6, 5)
+ +    OP         '+='          (6, 6) (6, 8)
+ +    NUMBER     '1'           (6, 9) (6, 10)
+ +    NEWLINE    '\\n'          (6, 10) (6, 11)
++    """)
+ +
-     >>> dump_tokens('''async def foo():
-     ...   async for i in 1: pass''')
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize('''\
++async def foo():
++  async for i in 1: pass''', """\
+ +    ASYNC      'async'       (1, 0) (1, 5)
+ +    NAME       'def'         (1, 6) (1, 9)
+ +    NAME       'foo'         (1, 10) (1, 13)
+ +    OP         '('           (1, 13) (1, 14)
+ +    OP         ')'           (1, 14) (1, 15)
+ +    OP         ':'           (1, 15) (1, 16)
+ +    NEWLINE    '\\n'          (1, 16) (1, 17)
+ +    INDENT     '  '          (2, 0) (2, 2)
+ +    ASYNC      'async'       (2, 2) (2, 7)
+ +    NAME       'for'         (2, 8) (2, 11)
+ +    NAME       'i'           (2, 12) (2, 13)
+ +    NAME       'in'          (2, 14) (2, 16)
+ +    NUMBER     '1'           (2, 17) (2, 18)
+ +    OP         ':'           (2, 18) (2, 19)
+ +    NAME       'pass'        (2, 20) (2, 24)
+ +    DEDENT     ''            (3, 0) (3, 0)
++    """)
+ +
-     >>> dump_tokens('''async def foo(async): await''')
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++        self.check_tokenize('''async def foo(async): await''', """\
+ +    ASYNC      'async'       (1, 0) (1, 5)
+ +    NAME       'def'         (1, 6) (1, 9)
+ +    NAME       'foo'         (1, 10) (1, 13)
+ +    OP         '('           (1, 13) (1, 14)
+ +    ASYNC      'async'       (1, 14) (1, 19)
+ +    OP         ')'           (1, 19) (1, 20)
+ +    OP         ':'           (1, 20) (1, 21)
+ +    AWAIT      'await'       (1, 22) (1, 27)
++    """)
++
++        self.check_tokenize('''\
++def f():
+ +
-     >>> dump_tokens('''def f():
-     ...
-     ...   def baz(): pass
-     ...   async def bar(): pass
-     ...
-     ...   await = 2''')
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++  def baz(): pass
++  async def bar(): pass
++
++  await = 2''', """\
+ +    NAME       'def'         (1, 0) (1, 3)
+ +    NAME       'f'           (1, 4) (1, 5)
+ +    OP         '('           (1, 5) (1, 6)
+ +    OP         ')'           (1, 6) (1, 7)
+ +    OP         ':'           (1, 7) (1, 8)
+ +    NEWLINE    '\\n'          (1, 8) (1, 9)
+ +    NL         '\\n'          (2, 0) (2, 1)
+ +    INDENT     '  '          (3, 0) (3, 2)
+ +    NAME       'def'         (3, 2) (3, 5)
+ +    NAME       'baz'         (3, 6) (3, 9)
+ +    OP         '('           (3, 9) (3, 10)
+ +    OP         ')'           (3, 10) (3, 11)
+ +    OP         ':'           (3, 11) (3, 12)
+ +    NAME       'pass'        (3, 13) (3, 17)
+ +    NEWLINE    '\\n'          (3, 17) (3, 18)
+ +    ASYNC      'async'       (4, 2) (4, 7)
+ +    NAME       'def'         (4, 8) (4, 11)
+ +    NAME       'bar'         (4, 12) (4, 15)
+ +    OP         '('           (4, 15) (4, 16)
+ +    OP         ')'           (4, 16) (4, 17)
+ +    OP         ':'           (4, 17) (4, 18)
+ +    NAME       'pass'        (4, 19) (4, 23)
+ +    NEWLINE    '\\n'          (4, 23) (4, 24)
+ +    NL         '\\n'          (5, 0) (5, 1)
+ +    NAME       'await'       (6, 2) (6, 7)
+ +    OP         '='           (6, 8) (6, 9)
+ +    NUMBER     '2'           (6, 10) (6, 11)
+ +    DEDENT     ''            (7, 0) (7, 0)
++    """)
++
++        self.check_tokenize('''\
++async def f():
++
++  def baz(): pass
++  async def bar(): pass
+ +
-     >>> dump_tokens('''async def f():
-     ...
-     ...   def baz(): pass
-     ...   async def bar(): pass
-     ...
-     ...   await = 2''')
-     ENCODING   'utf-8'       (0, 0) (0, 0)
++  await = 2''', """\
+ +    ASYNC      'async'       (1, 0) (1, 5)
+ +    NAME       'def'         (1, 6) (1, 9)
+ +    NAME       'f'           (1, 10) (1, 11)
+ +    OP         '('           (1, 11) (1, 12)
+ +    OP         ')'           (1, 12) (1, 13)
+ +    OP         ':'           (1, 13) (1, 14)
+ +    NEWLINE    '\\n'          (1, 14) (1, 15)
+ +    NL         '\\n'          (2, 0) (2, 1)
+ +    INDENT     '  '          (3, 0) (3, 2)
+ +    NAME       'def'         (3, 2) (3, 5)
+ +    NAME       'baz'         (3, 6) (3, 9)
+ +    OP         '('           (3, 9) (3, 10)
+ +    OP         ')'           (3, 10) (3, 11)
+ +    OP         ':'           (3, 11) (3, 12)
+ +    NAME       'pass'        (3, 13) (3, 17)
+ +    NEWLINE    '\\n'          (3, 17) (3, 18)
+ +    ASYNC      'async'       (4, 2) (4, 7)
+ +    NAME       'def'         (4, 8) (4, 11)
+ +    NAME       'bar'         (4, 12) (4, 15)
+ +    OP         '('           (4, 15) (4, 16)
+ +    OP         ')'           (4, 16) (4, 17)
+ +    OP         ':'           (4, 17) (4, 18)
+ +    NAME       'pass'        (4, 19) (4, 23)
+ +    NEWLINE    '\\n'          (4, 23) (4, 24)
+ +    NL         '\\n'          (5, 0) (5, 1)
+ +    AWAIT      'await'       (6, 2) (6, 7)
+ +    OP         '='           (6, 8) (6, 9)
+ +    NUMBER     '2'           (6, 10) (6, 11)
+ +    DEDENT     ''            (7, 0) (7, 0)
- """
- 
- from test import support
- from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
-                      STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
-                      open as tokenize_open, Untokenizer)
- from io import BytesIO
- from unittest import TestCase, mock
- import os
- import token
++    """)
+ +
- def dump_tokens(s):
-     """Print out the tokens in s in a table format.
   
-     The ENDMARKER is omitted.
-     """
-     f = BytesIO(s.encode('utf-8'))
-     for type, token, start, end, line in tokenize(f.readline):
-         if type == ENDMARKER:
-             break
-         type = tok_name[type]
-         print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
- 
- def roundtrip(f):
-     """
-     Test roundtrip for `untokenize`. `f` is an open file or a string.
-     The source code in f is tokenized to both 5- and 2-tuples.
-     Both sequences are converted back to source code via
-     tokenize.untokenize(), and the latter tokenized again to 2-tuples.
-     The test fails if the 3 pair tokenizations do not match.
- 
-     When untokenize bugs are fixed, untokenize with 5-tuples should
-     reproduce code that does not contain a backslash continuation
-     following spaces.  A proper test should test this.
- 
-     This function would be more useful for correcting bugs if it reported
-     the first point of failure, like assertEqual, rather than just
-     returning False -- or if it were only used in unittests and not
-     doctest and actually used assertEqual.
-     """
-     # Get source code and original tokenizations
-     if isinstance(f, str):
-         code = f.encode('utf-8')
-     else:
-         code = f.read()
-         f.close()
-     readline = iter(code.splitlines(keepends=True)).__next__
-     tokens5 = list(tokenize(readline))
-     tokens2 = [tok[:2] for tok in tokens5]
-     # Reproduce tokens2 from pairs
-     bytes_from2 = untokenize(tokens2)
-     readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
-     tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
-     # Reproduce tokens2 from 5-tuples
-     bytes_from5 = untokenize(tokens5)
-     readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
-     tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
-     # Compare 3 versions
-     return tokens2 == tokens2_from2 == tokens2_from5
- 
- # This is an example from the docs, set up as a doctest.
   def decistmt(s):
-     """Substitute Decimals for floats in a string of statements.
- 
-     >>> from decimal import Decimal
-     >>> s = 'print(+21.3e-5*-.1234/81.7)'
-     >>> decistmt(s)
-     "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
- 
-     The format of the exponent is inherited from the platform C library.
-     Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
-     we're only showing 11 digits, and the 12th isn't close to 5, the
-     rest of the output should be platform-independent.
- 
-     >>> exec(s) #doctest: +ELLIPSIS
-     -3.2171603427...e-0...7
- 
-     Output from calculations with Decimal should be identical across all
-     platforms.
- 
-     >>> exec(decistmt(s))
-     -3.217160342717258261933904529E-7
-     """
       result = []
       g = tokenize(BytesIO(s.encode('utf-8')).readline)   # tokenize the string
       for toknum, tokval, _, _, _  in g:
author	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Tue, 6 Oct 2015 15:23:12 +0000 (18:23 +0300)