Patch #1080727: add "encoding" parameter to doctest.DocFileSuite

author George Yoshida <dynkin@gmail.com>

Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)

committer George Yoshida <dynkin@gmail.com>

Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)
author George Yoshida <dynkin@gmail.com>
Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)
committer George Yoshida <dynkin@gmail.com>
Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)
diff --git a/Doc/lib/libdoctest.tex b/Doc/lib/libdoctest.tex

index 73b29ad71cdd804e472da47e74ef8fc44b8aa4e7..f9a97fa6193e20ffd120a196e34cd05cf3bb8300 100644 (file)
--- a/Doc/lib/libdoctest.tex
+++ b/Doc/lib/libdoctest.tex
@@ -868,7 +868,7 @@ sections \ref{doctest-simple-testmod} and
                            globs}\optional{, verbose}\optional{,
                            report}\optional{, optionflags}\optional{,
                            extraglobs}\optional{, raise_on_error}\optional{,
-                          parser}}
+                          parser}\optional{, encoding}}
  
    All arguments except \var{filename} are optional, and should be
    specified in keyword form.
@@ -941,7 +941,13 @@ sections \ref{doctest-simple-testmod} and
    subclass) that should be used to extract tests from the files.  It
    defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
  
+  Optional argument \var{encoding} specifies an encoding that should
+  be used to convert the file to unicode.
+
    \versionadded{2.4}
+
+  \versionchanged[The parameter \var{encoding} was added]{2.5}
+
  \end{funcdesc}
  
  \begin{funcdesc}{testmod}{\optional{m}\optional{, name}\optional{,
@@ -1061,7 +1067,8 @@ instances from text files and modules with doctests:
  \begin{funcdesc}{DocFileSuite}{\optional{module_relative}\optional{,
                                package}\optional{, setUp}\optional{,
                                tearDown}\optional{, globs}\optional{,
-                              optionflags}\optional{, parser}}
+                              optionflags}\optional{, parser}\optional{,
+                              encoding}}
  
    Convert doctest tests from one or more text files to a
    \class{\refmodule{unittest}.TestSuite}.
@@ -1128,11 +1135,17 @@ instances from text files and modules with doctests:
    subclass) that should be used to extract tests from the files.  It
    defaults to a normal parser (i.e., \code{\class{DocTestParser}()}).
  
+  Optional argument \var{encoding} specifies an encoding that should
+  be used to convert the file to unicode.
+
    \versionadded{2.4}
  
    \versionchanged[The global \code{__file__} was added to the
    globals provided to doctests loaded from a text file using
    \function{DocFileSuite()}]{2.5}
+
+  \versionchanged[The parameter \var{encoding} was added]{2.5}
+
  \end{funcdesc}
  
  \begin{funcdesc}{DocTestSuite}{\optional{module}\optional{,
diff --git a/Lib/doctest.py b/Lib/doctest.py

index 857bc1a6eb46046546bfd662db5d522e1a507ef8..971ec6cc4c108bdf4edf1f08079d46ce620a558c 100644 (file)
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -1869,7 +1869,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, isprivate=None,
  
  def testfile(filename, module_relative=True, name=None, package=None,
               globs=None, verbose=None, report=True, optionflags=0,
-             extraglobs=None, raise_on_error=False, parser=DocTestParser()):
+             extraglobs=None, raise_on_error=False, parser=DocTestParser(),
+             encoding=None):
      """
      Test examples in the given file.  Return (#failures, #tests).
  
@@ -1935,6 +1936,9 @@ def testfile(filename, module_relative=True, name=None, package=None,
      Optional keyword arg "parser" specifies a DocTestParser (or
      subclass) that should be used to extract tests from the files.
  
+    Optional keyword arg "encoding" specifies an encoding that should
+    be used to convert the file to unicode.
+ 
      Advanced tomfoolery:  testmod runs methods of a local instance of
      class doctest.Tester, then merges the results into (or creates)
      global Tester instance doctest.master.  Methods of doctest.master
@@ -1969,6 +1973,9 @@ def testfile(filename, module_relative=True, name=None, package=None,
      else:
          runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
  
+    if encoding is not None:
+        text = text.decode(encoding)
+
      # Read the file, convert it to a test, and run it.
      test = parser.get_doctest(text, globs, name, filename, 0)
      runner.run(test)
@@ -2339,7 +2346,8 @@ class DocFileCase(DocTestCase):
                  )
  
  def DocFileTest(path, module_relative=True, package=None,
-                globs=None, parser=DocTestParser(), **options):
+                globs=None, parser=DocTestParser(),
+                encoding=None, **options):
      if globs is None:
          globs = {}
      else:
@@ -2357,6 +2365,10 @@ def DocFileTest(path, module_relative=True, package=None,
  
      # Find the file and read it.
      name = os.path.basename(path)
+  
+    # If an encoding is specified, use it to convert the file to unicode
+    if encoding is not None:
+        doc = doc.decode(encoding)
  
      # Convert it to a test, and wrap it in a DocFileCase.
      test = parser.get_doctest(doc, globs, name, path, 0)
@@ -2414,6 +2426,9 @@ def DocFileSuite(*paths, **kw):
      parser
        A DocTestParser (or subclass) that should be used to extract
        tests from the files.
+ 
+    encoding
+      An encoding that will be used to convert the files to unicode.
      """
      suite = unittest.TestSuite()
  
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py

index 443c9624987c628768379ac796346e7d49206494..92d2d74eafd40da89d3b6f930945531358c63aab 100644 (file)
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -1937,9 +1937,10 @@ def test_DocFileSuite():
  
           >>> import unittest
           >>> suite = doctest.DocFileSuite('test_doctest.txt',
-         ...                              'test_doctest2.txt')
+         ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt')
           >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
  
         The test files are looked for in the directory containing the
         calling module.  A package keyword argument can be provided to
@@ -1948,9 +1949,10 @@ def test_DocFileSuite():
           >>> import unittest
           >>> suite = doctest.DocFileSuite('test_doctest.txt',
           ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
           ...                              package='test')
           >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
  
         '/' should be used as a path separator.  It will be converted
         to a native separator at run time:
@@ -1995,19 +1997,21 @@ def test_DocFileSuite():
  
           >>> suite = doctest.DocFileSuite('test_doctest.txt',
           ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
           ...                              globs={'favorite_color': 'blue'})
           >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=1>
+         <unittest.TestResult run=3 errors=0 failures=2>
  
         In this case, we supplied a missing favorite color. You can
         provide doctest options:
  
           >>> suite = doctest.DocFileSuite('test_doctest.txt',
           ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
           ...                         optionflags=doctest.DONT_ACCEPT_BLANKLINE,
           ...                              globs={'favorite_color': 'blue'})
           >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=2>
+         <unittest.TestResult run=3 errors=0 failures=3>
  
         And, you can provide setUp and tearDown functions:
  
@@ -2025,9 +2029,10 @@ def test_DocFileSuite():
  
           >>> suite = doctest.DocFileSuite('test_doctest.txt',
           ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
           ...                              setUp=setUp, tearDown=tearDown)
           >>> suite.run(unittest.TestResult())
-         <unittest.TestResult run=2 errors=0 failures=1>
+         <unittest.TestResult run=3 errors=0 failures=2>
  
         But the tearDown restores sanity:
  
@@ -2060,6 +2065,17 @@ def test_DocFileSuite():
           >>> suite.run(unittest.TestResult())
           <unittest.TestResult run=1 errors=0 failures=0>
  
+       If the tests contain non-ASCII characters, we have to specify which
+       encoding the file is encoded with. We do so by using the `encoding`
+       parameter:
+
+         >>> suite = doctest.DocFileSuite('test_doctest.txt',
+         ...                              'test_doctest2.txt',
+         ...                              'test_doctest4.txt',
+         ...                              encoding='utf-8')
+         >>> suite.run(unittest.TestResult())
+         <unittest.TestResult run=3 errors=0 failures=2>
+
         """
  
  def test_trailing_space_in_test():
@@ -2266,6 +2282,32 @@ debugging):
      Traceback (most recent call last):
      UnexpectedException: ...
      >>> doctest.master = None  # Reset master.
+
+If the tests contain non-ASCII characters, the tests might fail, since
+it's unknown which encoding is used. The encoding can be specified
+using the optional keyword argument `encoding`:
+
+    >>> doctest.testfile('test_doctest4.txt') # doctest: +ELLIPSIS
+    **********************************************************************
+    File "...", line 7, in test_doctest4.txt
+    Failed example:
+        u'...'
+    Expected:
+        u'f\xf6\xf6'
+    Got:
+        u'f\xc3\xb6\xc3\xb6'
+    **********************************************************************
+    ...
+    **********************************************************************
+    1 items had failures:
+       2 of   4 in test_doctest4.txt
+    ***Test Failed*** 2 failures.
+    (2, 4)
+    >>> doctest.master = None  # Reset master.
+
+    >>> doctest.testfile('test_doctest4.txt', encoding='utf-8')
+    (0, 4)
+    >>> doctest.master = None  # Reset master.
  """
  
  # old_test1, ... used to live in doctest.py, but cluttered it.  Note
diff --git a/Lib/test/test_doctest4.txt b/Lib/test/test_doctest4.txt

new file mode 100644 (file)

index 0000000..a219d16
--- /dev/null
+++ b/Lib/test/test_doctest4.txt
@@ -0,0 +1,17 @@
+This is a sample doctest in a text file that contains non-ASCII characters.
+This file is encoded using UTF-8.
+
+In order to get this test to pass, we have to manually specify the
+encoding.
+
+  >>> u'föö'
+  u'f\xf6\xf6'
+
+  >>> u'bąr'
+  u'b\u0105r'
+
+  >>> 'föö'
+  'f\xc3\xb6\xc3\xb6'
+
+  >>> 'bąr'
+  'b\xc4\x85r'
diff --git a/Misc/NEWS b/Misc/NEWS

index 8d1063d5e30cca6d853c0c14caf19baf699ff713..31a5af9302743e875224a840f5f8a0b2c8ae5392 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -85,6 +85,8 @@ Extension Modules
  Library
  -------
  
+- Patch #1080727: add "encoding" parameter to doctest.DocFileSuite.
+
  - Patch #1281707: speed up gzip.readline.
  
  - Patch #1180296: Two new functions were added to the locale module:
author	George Yoshida <dynkin@gmail.com>
	Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)
committer	George Yoshida <dynkin@gmail.com>
	Sun, 28 May 2006 16:39:09 +0000 (16:39 +0000)
Doc/lib/libdoctest.tex		patch \| blob \| history
Lib/doctest.py		patch \| blob \| history
Lib/test/test_doctest.py		patch \| blob \| history
Lib/test/test_doctest4.txt	[new file with mode: 0644]	patch \| blob
Misc/NEWS		patch \| blob \| history