Merged revisions 59041-59055 via svnmerge from

author Guido van Rossum <guido@python.org>

Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)

committer Guido van Rossum <guido@python.org>

Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)
author Guido van Rossum <guido@python.org>
Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)
committer Guido van Rossum <guido@python.org>
Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)
diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst

index 50140c6dae8ef12162d9e8d3de373fe9da8a60df..f59cd829a911753a283a55f837084c0a8ac9951c 100644 (file)
--- a/Doc/library/mimetypes.rst
+++ b/Doc/library/mimetypes.rst
@@ -96,8 +96,8 @@ behavior of the module.
     extension is already known, the new type will replace the old one. When the type
     is already known the extension will be added to the list of known extensions.
  
-   When *strict* is the mapping will added to the official MIME types, otherwise to
-   the non-standard ones.
+   When *strict* is True (the default), the mapping will added to the official MIME
+   types, otherwise to the non-standard ones.
  
  
  .. data:: inited
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py

index 07cd5eeb1b2fc239cc46dcb4edd0bbdc664a34b5..1bb479203f365dabd6cc59bb0b6debef55e40396 100644 (file)
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -103,12 +103,18 @@ class StreamReader(codecs.StreamReader):
              pass
  
      def decode(self, input, errors='strict'):
-        if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
-            # not enough data to decide if this is a BOM
-            # => try again on the next call
-            return ("", 0)
+        if len(input) < 3:
+            if codecs.BOM_UTF8.startswith(input):
+                # not enough data to decide if this is a BOM
+                # => try again on the next call
+                return ("", 0)
+        elif input[:3] == codecs.BOM_UTF8:
+            self.decode = codecs.utf_8_decode
+            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+            return (output, consumed+3)
+        # (else) no BOM present
          self.decode = codecs.utf_8_decode
-        return decode(input, errors)
+        return codecs.utf_8_decode(input, errors)
  
  ### encodings module API
  
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py

index 3c6e4a0af599ba957a5922f7fe260290cb8a0301..dcb768f3c0a91c5949c3f06a1f2802bedc0ae34c 100644 (file)
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -29,6 +29,7 @@ def _run_python(*args):
  @contextlib.contextmanager
  def temp_dir():
      dirname = tempfile.mkdtemp()
+    dirname = os.path.realpath(dirname)
      try:
          yield dirname
      finally:
@@ -82,7 +83,7 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
      zip_file.close()
      # if verbose:
      #    zip_file = zipfile.ZipFile(zip_name, 'r')
-    #    print "Contents of %r:" % zip_name
+    #    print("Contents of %r:" % zip_name)
      #    zip_file.printdir()
      #    zip_file.close()
      return zip_name
@@ -90,9 +91,9 @@ def _make_test_zip(zip_dir, zip_basename, script_name):
  class CmdLineTest(unittest.TestCase):
      def _check_script(self, script_name, expected_file, expected_argv0):
          exit_code, data = _run_python(script_name)
-        # if verbose:
-        #    print "Output from test script %r:" % script_name
-        #    print data
+        if verbose:
+            print("Output from test script %r:" % script_name)
+            print(data)
          self.assertEqual(exit_code, 0, data)
          printed_file = '__file__==%r' % expected_file
          printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index 5833c6db183432ffd0aa3db696dfc49aeffffff8..413a5aa8d27997ebfb5d5ad83023221988413154 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -59,7 +59,7 @@ class MixInCheckStateHandling:
  class ReadTest(unittest.TestCase, MixInCheckStateHandling):
      def check_partial(self, input, partialresults):
          # get a StreamReader for the encoding and feed the bytestring version
-        # of input to the reader byte by byte. Read every available from
+        # of input to the reader byte by byte. Read everything available from
          # the StreamReader and check that the results equal the appropriate
          # entries from partialresults.
          q = Queue(b"")
@@ -618,10 +618,53 @@ class UTF8SigTest(ReadTest):
          s = "spam"
          self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
  
-    def test_decoder_state(self):
-        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
-        self.check_state_handling_decode(self.encoding,
-                                         u, u.encode(self.encoding))
+    def test_stream_bom(self):
+        unistring = "ABC\u00A1\u2200XYZ"
+        bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+        reader = codecs.getreader("utf-8-sig")
+        for sizehint in [None] + list(range(1, 11)) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = reader(io.BytesIO(bytestring))
+            ostream = io.StringIO()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                ostream.write(data)
+
+            got = ostream.getvalue()
+            self.assertEqual(got, unistring)
+
+    def test_stream_bare(self):
+        unistring = "ABC\u00A1\u2200XYZ"
+        bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+        reader = codecs.getreader("utf-8-sig")
+        for sizehint in [None] + list(range(1, 11)) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = reader(io.BytesIO(bytestring))
+            ostream = io.StringIO()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
+
+                if not data:
+                    break
+                ostream.write(data)
+
+            got = ostream.getvalue()
+            self.assertEqual(got, unistring)
+
+class EscapeDecodeTest(unittest.TestCase):
+    def test_empty(self):
+        self.assertEquals(codecs.escape_decode(""), ("", 0))
  
  class RecodingTest(unittest.TestCase):
      def test_recoding(self):
author	Guido van Rossum <guido@python.org>
	Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)
committer	Guido van Rossum <guido@python.org>
	Mon, 19 Nov 2007 18:03:44 +0000 (18:03 +0000)
Doc/library/mimetypes.rst		patch \| blob \| history
Lib/encodings/utf_8_sig.py		patch \| blob \| history
Lib/test/test_cmd_line_script.py		patch \| blob \| history
Lib/test/test_codecs.py		patch \| blob \| history