pass
def decode(self, input, errors='strict'):
- if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
- # not enough data to decide if this is a BOM
- # => try again on the next call
- return ("", 0)
+ if len(input) < 3:
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this is a BOM
+ # => try again on the next call
+ return ("", 0)
+ elif input[:3] == codecs.BOM_UTF8:
+ self.decode = codecs.utf_8_decode
+ (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+ return (output, consumed+3)
+ # (else) no BOM present
self.decode = codecs.utf_8_decode
- return decode(input, errors)
+ return codecs.utf_8_decode(input, errors)
### encodings module API
@contextlib.contextmanager
def temp_dir():
dirname = tempfile.mkdtemp()
+ dirname = os.path.realpath(dirname)
try:
yield dirname
finally:
zip_file.close()
# if verbose:
# zip_file = zipfile.ZipFile(zip_name, 'r')
- # print "Contents of %r:" % zip_name
+ # print("Contents of %r:" % zip_name)
# zip_file.printdir()
# zip_file.close()
return zip_name
class CmdLineTest(unittest.TestCase):
def _check_script(self, script_name, expected_file, expected_argv0):
exit_code, data = _run_python(script_name)
- # if verbose:
- # print "Output from test script %r:" % script_name
- # print data
+ if verbose:
+ print("Output from test script %r:" % script_name)
+ print(data)
self.assertEqual(exit_code, 0, data)
printed_file = '__file__==%r' % expected_file
printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
class ReadTest(unittest.TestCase, MixInCheckStateHandling):
def check_partial(self, input, partialresults):
# get a StreamReader for the encoding and feed the bytestring version
- # of input to the reader byte by byte. Read every available from
+ # of input to the reader byte by byte. Read everything available from
# the StreamReader and check that the results equal the appropriate
# entries from partialresults.
q = Queue(b"")
s = "spam"
self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
- def test_decoder_state(self):
- u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
- self.check_state_handling_decode(self.encoding,
- u, u.encode(self.encoding))
+ def test_stream_bom(self):
+ unistring = "ABC\u00A1\u2200XYZ"
+ bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+ reader = codecs.getreader("utf-8-sig")
+ for sizehint in [None] + list(range(1, 11)) + \
+ [64, 128, 256, 512, 1024]:
+ istream = reader(io.BytesIO(bytestring))
+ ostream = io.StringIO()
+ while 1:
+ if sizehint is not None:
+ data = istream.read(sizehint)
+ else:
+ data = istream.read()
+
+ if not data:
+ break
+ ostream.write(data)
+
+ got = ostream.getvalue()
+ self.assertEqual(got, unistring)
+
+ def test_stream_bare(self):
+ unistring = "ABC\u00A1\u2200XYZ"
+ bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+ reader = codecs.getreader("utf-8-sig")
+ for sizehint in [None] + list(range(1, 11)) + \
+ [64, 128, 256, 512, 1024]:
+ istream = reader(io.BytesIO(bytestring))
+ ostream = io.StringIO()
+ while 1:
+ if sizehint is not None:
+ data = istream.read(sizehint)
+ else:
+ data = istream.read()
+
+ if not data:
+ break
+ ostream.write(data)
+
+ got = ostream.getvalue()
+ self.assertEqual(got, unistring)
+
+class EscapeDecodeTest(unittest.TestCase):
+ def test_empty(self):
+ self.assertEquals(codecs.escape_decode(""), ("", 0))
class RecodingTest(unittest.TestCase):
def test_recoding(self):