]> granicus.if.org Git - python/commitdiff
Patch #1443155: Add the incremental codecs support for CJK codecs.
authorHye-Shik Chang <hyeshik@gmail.com>
Sun, 26 Mar 2006 02:34:59 +0000 (02:34 +0000)
committerHye-Shik Chang <hyeshik@gmail.com>
Sun, 26 Mar 2006 02:34:59 +0000 (02:34 +0000)
(reviewed by Walter Dörwald)

31 files changed:
Lib/encodings/big5.py
Lib/encodings/big5hkscs.py
Lib/encodings/cp932.py
Lib/encodings/cp949.py
Lib/encodings/cp950.py
Lib/encodings/euc_jis_2004.py
Lib/encodings/euc_jisx0213.py
Lib/encodings/euc_jp.py
Lib/encodings/euc_kr.py
Lib/encodings/gb18030.py
Lib/encodings/gb2312.py
Lib/encodings/gbk.py
Lib/encodings/hz.py
Lib/encodings/iso2022_jp.py
Lib/encodings/iso2022_jp_1.py
Lib/encodings/iso2022_jp_2.py
Lib/encodings/iso2022_jp_2004.py
Lib/encodings/iso2022_jp_3.py
Lib/encodings/iso2022_jp_ext.py
Lib/encodings/iso2022_kr.py
Lib/encodings/johab.py
Lib/encodings/shift_jis.py
Lib/encodings/shift_jis_2004.py
Lib/encodings/shift_jisx0213.py
Lib/test/test_multibytecodec.py
Lib/test/test_multibytecodec_support.py
Modules/cjkcodecs/_codecs_cn.c
Modules/cjkcodecs/multibytecodec.c
Modules/cjkcodecs/multibytecodec.h
Tools/unicode/Makefile
Tools/unicode/gencjkcodecs.py [new file with mode: 0644]

index d56aa1bea6c8e70dce80db5c7c3141780cc86abc..c864b683a639ba0e443d6768ee85cbd4086624ff 100644 (file)
@@ -2,10 +2,10 @@
 # big5.py: Python Unicode Codec for BIG5
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: big5.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_tw, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_tw.getcodec('big5')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='big5',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 443997f463b9b0ce4997949a9de0c7108bfd4a03..9b812a2b83d77ead3f6146800d9be44b4d021c0e 100644 (file)
@@ -2,10 +2,10 @@
 # big5hkscs.py: Python Unicode Codec for BIG5HKSCS
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: big5hkscs.py,v 1.1 2004/06/29 05:14:27 perky Exp $
 #
 
 import _codecs_hk, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_hk.getcodec('big5hkscs')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='big5hkscs',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 38937f581149255c267d3f5188ebed79d396305b..54d6bb891909e314936d6ada0236b8a84374907c 100644 (file)
@@ -2,10 +2,10 @@
 # cp932.py: Python Unicode Codec for CP932
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: cp932.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('cp932')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp932',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 0f3c847028ce10f3777fedb6b6255b33e135a81f..6012925967c0b42d87b701607775596a9624e8f5 100644 (file)
@@ -2,10 +2,10 @@
 # cp949.py: Python Unicode Codec for CP949
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: cp949.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('cp949')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp949',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index dab3e289b9dc614ea435f2708f6fcd97c75b7a12..b6517d9d434bcfc0d5d72e9e2f0d0875b8f80818 100644 (file)
@@ -2,10 +2,10 @@
 # cp950.py: Python Unicode Codec for CP950
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: cp950.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_tw, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_tw.getcodec('cp950')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='cp950',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 02d55ca0c41e7ce69dde4b24edba10ead7b53ed5..88e605a871a24893af1bf7f9cf8b5907c2115f22 100644 (file)
@@ -2,10 +2,10 @@
 # euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: euc_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jis_2004')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jis_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 30f173e0ac6dc7b40dd55b4f075ecfbc5432dfa0..10d4b31b13f6615f226340c47d376a076f61fd3f 100644 (file)
@@ -2,10 +2,10 @@
 # euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: euc_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jisx0213')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jisx0213',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index a3947a304496d2082af7e1fd5426153218ad5ec8..4dc0b9b4c8429a3bdea33d9c175c083f968aba19 100644 (file)
@@ -2,10 +2,10 @@
 # euc_jp.py: Python Unicode Codec for EUC_JP
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: euc_jp.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('euc_jp')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_jp',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index bbebee8c8421fb4e8a804bd3dd8a5281c0db47e9..30716f389f9c75789f349613ede1c148febec523 100644 (file)
@@ -2,10 +2,10 @@
 # euc_kr.py: Python Unicode Codec for EUC_KR
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: euc_kr.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('euc_kr')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='euc_kr',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 7eca3192f8068983dc85d84cf0f1b98b63f2ef3b..e685cf6c1e6351264239c6fcf946d3ba23ea2992 100644 (file)
@@ -2,10 +2,10 @@
 # gb18030.py: Python Unicode Codec for GB18030
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: gb18030.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gb18030')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gb18030',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 5130efa1ab87f62a21494b31dd1bd3880d8c4de4..e99bf1d5c1869d59d5210022e7e5acf5fab08592 100644 (file)
@@ -2,10 +2,10 @@
 # gb2312.py: Python Unicode Codec for GB2312
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: gb2312.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gb2312')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gb2312',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 67854bcdc8f67faa1d518fc95d4ae173bc40e309..09123aeb1a566995a2e07d6d326709ea24aa925e 100644 (file)
@@ -2,10 +2,10 @@
 # gbk.py: Python Unicode Codec for GBK
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: gbk.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('gbk')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='gbk',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 3940894bc0121a94d6b4ff2d41fdf48b84145df1..06f7d2f4c811594ec7d9779d29c43a09bf19e685 100644 (file)
@@ -2,10 +2,10 @@
 # hz.py: Python Unicode Codec for HZ
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: hz.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_cn, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_cn.getcodec('hz')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='hz',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 109658b830a9349f3e1cadb4548d132bc7e19315..fb041596529c273a101f621c776a8f8fcb0184b5 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp.py: Python Unicode Codec for ISO2022_JP
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 201bd289109f1c24b24b7ab7fa6f2b0047ee270c..fde51c2f021e750af10671827be6c3827e5ef3f1 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp_1.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_1')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_1',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 7a610180b15517ea9562fb21e7d76c1a53298a01..766ab46c7d515f468a089170392dd45db9fed2f9 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp_2.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_2')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_2',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 249712471b1bc1351a707acac605873cf396aba5..236ab4e5a46a14398a2d71d600425292bcf7777f 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 8b2ed002801f11508523787d5d8fbcabf69b3fdf..e3cf9504393646315e356a7a90333e3460857ea7 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp_3.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_3')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_3',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 97cb4e720857b19a932955f991168e4638b38f15..89d35b5d110b03f9b078b09a7b41be0947d3f1b1 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_jp_ext.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_jp_ext',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index f5549ca644e5a7ad9c690ebd5c58ef09343304f6..41f7ce0d9d79b7f20a0bf247a070672570055ed1 100644 (file)
@@ -2,10 +2,10 @@
 # iso2022_kr.py: Python Unicode Codec for ISO2022_KR
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: iso2022_kr.py,v 1.2 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_iso2022, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_iso2022.getcodec('iso2022_kr')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='iso2022_kr',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index b6a87d74813ded7bf80d5731846c2347fdd5f931..6a2c9931b08515b3e6ef6ad4faf3007ed8f6ee6f 100644 (file)
@@ -2,10 +2,10 @@
 # johab.py: Python Unicode Codec for JOHAB
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: johab.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_kr, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_kr.getcodec('johab')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='johab',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index ec5e51755ae315c08dc713b75f6ed8da29440c63..b1f77fc345df6137e904395968d27396091cf2b1 100644 (file)
@@ -2,10 +2,10 @@
 # shift_jis.py: Python Unicode Codec for SHIFT_JIS
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: shift_jis.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jis')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jis',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 446cd7cce7018bfe9d5b099ee9c71e7bacde0799..6078a5266e46696d6c0a4ab6910e189fa3332b91 100644 (file)
@@ -2,10 +2,10 @@
 # shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: shift_jis_2004.py,v 1.1 2004/07/07 16:18:25 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jis_2004')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jis_2004',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index 495468b6f711517ab4e4254a4d71b17d7acbbfa9..5a0f24c14e119fe7f585c3d752a69289af779424 100644 (file)
@@ -2,10 +2,10 @@
 # shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213
 #
 # Written by Hye-Shik Chang <perky@FreeBSD.org>
-# $CJKCodecs: shift_jisx0213.py,v 1.8 2004/06/28 18:16:03 perky Exp $
 #
 
 import _codecs_jp, codecs
+import _multibytecodec as mbc
 
 codec = _codecs_jp.getcodec('shift_jisx0213')
 
@@ -13,22 +13,24 @@ class Codec(codecs.Codec):
     encode = codec.encode
     decode = codec.decode
 
-class StreamReader(Codec, codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamReader.__init__(self, stream, errors)
-        __codec = codec.StreamReader(stream, errors)
-        self.read = __codec.read
-        self.readline = __codec.readline
-        self.readlines = __codec.readlines
-        self.reset = __codec.reset
-
-class StreamWriter(Codec, codecs.StreamWriter):
-    def __init__(self, stream, errors='strict'):
-        codecs.StreamWriter.__init__(self, stream, errors)
-        __codec = codec.StreamWriter(stream, errors)
-        self.write = __codec.write
-        self.writelines = __codec.writelines
-        self.reset = __codec.reset
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
 
 def getregentry():
-    return (codec.encode, codec.decode, StreamReader, StreamWriter)
+    return codecs.CodecInfo(
+        name='shift_jisx0213',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
index aef7931e037ad88b0d54863243d396fe69f4048d..8f9f6e970b4725426ea59812aac0018ac6f41c0a 100644 (file)
@@ -9,11 +9,106 @@ from test import test_support
 from test import test_multibytecodec_support
 import unittest, StringIO, codecs
 
+class Test_MultibyteCodec(unittest.TestCase):
+
+    def test_nullcoding(self):
+        self.assertEqual(''.decode('gb18030'), u'')
+        self.assertEqual(unicode('', 'gb18030'), u'')
+        self.assertEqual(u''.encode('gb18030'), '')
+
+    def test_str_decode(self):
+        self.assertEqual('abcd'.encode('gb18030'), 'abcd')
+
+
+class Test_IncrementalEncoder(unittest.TestCase):
+
+    def test_stateless(self):
+        # cp949 encoder isn't stateful at all.
+        encoder = codecs.getincrementalencoder('cp949')()
+        self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
+                         '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
+                         '\xa1\xd9\xa1\xad\xa1\xd9')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'', True), '')
+        self.assertEqual(encoder.encode(u'', False), '')
+        self.assertEqual(encoder.reset(), None)
+
+    def test_stateful(self):
+        # jisx0213 encoder is stateful for a few codepoints. eg)
+        #   U+00E6 => A9DC
+        #   U+00E6 U+0300 => ABC4
+        #   U+0300 => ABDC
+
+        encoder = codecs.getincrementalencoder('jisx0213')()
+        self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
+
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
+
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertEqual(encoder.encode('', True), '\xa9\xdc')
+        self.assertEqual(encoder.encode('', True), '')
+
+    def test_stateful_keep_buffer(self):
+        encoder = codecs.getincrementalencoder('jisx0213')()
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.reset(), None)
+        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
+        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
+
+
+class Test_IncrementalDecoder(unittest.TestCase):
+
+    def test_dbcs(self):
+        # cp949 decoder is simple with only 1 or 2 bytes sequences.
+        decoder = codecs.getincrementaldecoder('cp949')()
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
+                         u'\ud30c\uc774')
+        self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
+                         u'\uc36c \ub9c8\uc744')
+        self.assertEqual(decoder.decode(''), u'')
+
+    def test_dbcs_keep_buffer(self):
+        decoder = codecs.getincrementaldecoder('cp949')()
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
+        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
+        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+
+    def test_iso2022(self):
+        decoder = codecs.getincrementaldecoder('iso2022-jp')()
+        ESC = '\x1b'
+        self.assertEqual(decoder.decode(ESC + '('), u'')
+        self.assertEqual(decoder.decode('B', True), u'')
+        self.assertEqual(decoder.decode(ESC + '$'), u'')
+        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
+        self.assertEqual(decoder.decode('@$@'), u'\u4e16')
+        self.assertEqual(decoder.decode('$', True), u'\u4e16')
+        self.assertEqual(decoder.reset(), None)
+        self.assertEqual(decoder.decode('@$'), u'@$')
+        self.assertEqual(decoder.decode(ESC + '$'), u'')
+        self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
+        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
+
+
 class Test_StreamWriter(unittest.TestCase):
     if len(u'\U00012345') == 2: # UCS2
         def test_gb18030(self):
             s= StringIO.StringIO()
-            c = codecs.lookup('gb18030')[3](s)
+            c = codecs.getwriter('gb18030')(s)
             c.write(u'123')
             self.assertEqual(s.getvalue(), '123')
             c.write(u'\U00012345')
@@ -30,15 +125,16 @@ class Test_StreamWriter(unittest.TestCase):
             self.assertEqual(s.getvalue(),
                     '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
 
-        # standard utf-8 codecs has broken StreamReader
-        if test_multibytecodec_support.__cjkcodecs__:
-            def test_utf_8(self):
-                s= StringIO.StringIO()
-                c = codecs.lookup('utf-8')[3](s)
-                c.write(u'123')
-                self.assertEqual(s.getvalue(), '123')
-                c.write(u'\U00012345')
-                self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+        def test_utf_8(self):
+            s= StringIO.StringIO()
+            c = codecs.getwriter('utf-8')(s)
+            c.write(u'123')
+            self.assertEqual(s.getvalue(), '123')
+            c.write(u'\U00012345')
+            self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
+
+            # Python utf-8 codec can't buffer surrogate pairs yet.
+            if 0:
                 c.write(u'\U00012345'[0])
                 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
                 c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
@@ -61,14 +157,6 @@ class Test_StreamWriter(unittest.TestCase):
     else: # UCS4
         pass
 
-    def test_nullcoding(self):
-        self.assertEqual(''.decode('gb18030'), u'')
-        self.assertEqual(unicode('', 'gb18030'), u'')
-        self.assertEqual(u''.encode('gb18030'), '')
-
-    def test_str_decode(self):
-        self.assertEqual('abcd'.encode('gb18030'), 'abcd')
-
     def test_streamwriter_strwrite(self):
         s = StringIO.StringIO()
         wr = codecs.getwriter('gb18030')(s)
@@ -83,6 +171,9 @@ class Test_ISO2022(unittest.TestCase):
 
 def test_main():
     suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
+    suite.addTest(unittest.makeSuite(Test_IncrementalEncoder))
+    suite.addTest(unittest.makeSuite(Test_IncrementalDecoder))
     suite.addTest(unittest.makeSuite(Test_StreamWriter))
     suite.addTest(unittest.makeSuite(Test_ISO2022))
     test_support.run_suite(suite)
index 45a63e7af81e02163b16df5099cf45a4014a9c4a..563a3ea7f7cfcef2bb611dd4958687122f72b892 100644 (file)
@@ -3,15 +3,12 @@
 # test_multibytecodec_support.py
 #   Common Unittest Routines for CJK codecs
 #
-# $CJKCodecs: test_multibytecodec_support.py,v 1.6 2004/06/19 06:09:55 perky Exp $
 
 import sys, codecs, os.path
 import unittest
 from test import test_support
 from StringIO import StringIO
 
-__cjkcodecs__ = 0 # define this as 0 for python
-
 class TestBase:
     encoding        = ''   # codec name
     codec           = None # codec tuple (with 4 elements)
@@ -21,11 +18,17 @@ class TestBase:
     roundtriptest   = 1    # set if roundtrip is possible with unicode
     has_iso10646    = 0    # set if this encoding contains whole iso10646 map
     xmlcharnametest = None # string to test xmlcharrefreplace
+    unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
 
     def setUp(self):
         if self.codec is None:
             self.codec = codecs.lookup(self.encoding)
-        self.encode, self.decode, self.reader, self.writer = self.codec
+        self.encode = self.codec.encode
+        self.decode = self.codec.decode
+        self.reader = self.codec.streamreader
+        self.writer = self.codec.streamwriter
+        self.incrementalencoder = self.codec.incrementalencoder
+        self.incrementaldecoder = self.codec.incrementaldecoder
 
     def test_chunkcoding(self):
         for native, utf8 in zip(*[StringIO(f).readlines()
@@ -47,51 +50,142 @@ class TestBase:
             else:
                 self.assertRaises(UnicodeError, func, source, scheme)
 
-    if sys.hexversion >= 0x02030000:
-        def test_xmlcharrefreplace(self):
-            if self.has_iso10646:
-                return
+    def test_xmlcharrefreplace(self):
+        if self.has_iso10646:
+            return
+
+        s = u"\u0b13\u0b23\u0b60 nd eggs"
+        self.assertEqual(
+            self.encode(s, "xmlcharrefreplace")[0],
+            "&#2835;&#2851;&#2912; nd eggs"
+        )
+
+    def test_customreplace(self):
+        if self.has_iso10646:
+            return
+
+        from htmlentitydefs import codepoint2name
+
+        def xmlcharnamereplace(exc):
+            if not isinstance(exc, UnicodeEncodeError):
+                raise TypeError("don't know how to handle %r" % exc)
+            l = []
+            for c in exc.object[exc.start:exc.end]:
+                if ord(c) in codepoint2name:
+                    l.append(u"&%s;" % codepoint2name[ord(c)])
+                else:
+                    l.append(u"&#%d;" % ord(c))
+            return (u"".join(l), exc.end)
+
+        codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
 
-            s = u"\u0b13\u0b23\u0b60 nd eggs"
-            self.assertEqual(
-                self.encode(s, "xmlcharrefreplace")[0],
-                "&#2835;&#2851;&#2912; nd eggs"
-            )
+        if self.xmlcharnametest:
+            sin, sout = self.xmlcharnametest
+        else:
+            sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
+            sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
+        self.assertEqual(self.encode(sin,
+                                    "test.xmlcharnamereplace")[0], sout)
+
+    def test_callback_wrong_objects(self):
+        def myreplace(exc):
+            return (ret, exc.end)
+        codecs.register_error("test.cjktest", myreplace)
+
+        for ret in ([1, 2, 3], [], None, object(), 'string', ''):
+            self.assertRaises(TypeError, self.encode, self.unmappedunicode,
+                              'test.cjktest')
+
+    def test_callback_None_index(self):
+        def myreplace(exc):
+            return (u'x', None)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertRaises(TypeError, self.encode, self.unmappedunicode,
+                          'test.cjktest')
+
+    def test_callback_backward_index(self):
+        def myreplace(exc):
+            if myreplace.limit > 0:
+                myreplace.limit -= 1
+                return (u'REPLACED', 0)
+            else:
+                return (u'TERMINAL', exc.end)
+        myreplace.limit = 3
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+                                     'test.cjktest'),
+                ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
+
+    def test_callback_forward_index(self):
+        def myreplace(exc):
+            return (u'REPLACED', exc.end + 2)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+                                     'test.cjktest'), ('abcdREPLACEDgh', 9))
+
+    def test_callback_index_outofbound(self):
+        def myreplace(exc):
+            return (u'TERM', 100)
+        codecs.register_error("test.cjktest", myreplace)
+        self.assertRaises(IndexError, self.encode, self.unmappedunicode,
+                          'test.cjktest')
+
+    def test_incrementalencoder(self):
+        UTF8Reader = codecs.getreader('utf-8')
+        for sizehint in [None] + range(1, 33) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = UTF8Reader(StringIO(self.tstring[1]))
+            ostream = StringIO()
+            encoder = self.incrementalencoder()
+            while 1:
+                if sizehint is not None:
+                    data = istream.read(sizehint)
+                else:
+                    data = istream.read()
 
-        def test_customreplace(self):
-            if self.has_iso10646:
-                return
+                if not data:
+                    break
+                e = encoder.encode(data)
+                ostream.write(e)
 
-            import htmlentitydefs
+            self.assertEqual(ostream.getvalue(), self.tstring[0])
 
-            names = {}
-            for (key, value) in htmlentitydefs.entitydefs.items():
-                if len(value)==1:
-                    names[value.decode('latin-1')] = self.decode(key)[0]
+    def test_incrementaldecoder(self):
+        UTF8Writer = codecs.getwriter('utf-8')
+        for sizehint in [None, -1] + range(1, 33) + \
+                        [64, 128, 256, 512, 1024]:
+            istream = StringIO(self.tstring[0])
+            ostream = UTF8Writer(StringIO())
+            decoder = self.incrementaldecoder()
+            while 1:
+                data = istream.read(sizehint)
+                if not data:
+                    break
                 else:
-                    names[unichr(int(value[2:-1]))] = self.decode(key)[0]
-
-            def xmlcharnamereplace(exc):
-                if not isinstance(exc, UnicodeEncodeError):
-                    raise TypeError("don't know how to handle %r" % exc)
-                l = []
-                for c in exc.object[exc.start:exc.end]:
-                    try:
-                        l.append(u"&%s;" % names[c])
-                    except KeyError:
-                        l.append(u"&#%d;" % ord(c))
-                return (u"".join(l), exc.end)
-
-            codecs.register_error(
-                "test.xmlcharnamereplace", xmlcharnamereplace)
-
-            if self.xmlcharnametest:
-                sin, sout = self.xmlcharnametest
-            else:
-                sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
-                sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
-            self.assertEqual(self.encode(sin,
-                                        "test.xmlcharnamereplace")[0], sout)
+                    u = decoder.decode(data)
+                    ostream.write(u)
+
+            self.assertEqual(ostream.getvalue(), self.tstring[1])
+
+    def test_incrementalencoder_error_callback(self):
+        inv = self.unmappedunicode
+
+        e = self.incrementalencoder()
+        self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
+
+        e.errors = 'ignore'
+        self.assertEqual(e.encode(inv, True), '')
+
+        e.reset()
+        def tempreplace(exc):
+            return (u'called', exc.end)
+        codecs.register_error('test.incremental_error_callback', tempreplace)
+        e.errors = 'test.incremental_error_callback'
+        self.assertEqual(e.encode(inv, True), 'called')
+
+        # again
+        e.errors = 'ignore'
+        self.assertEqual(e.encode(inv, True), '')
 
     def test_streamreader(self):
         UTF8Writer = codecs.getwriter('utf-8')
@@ -113,11 +207,7 @@ class TestBase:
                 self.assertEqual(ostream.getvalue(), self.tstring[1])
 
     def test_streamwriter(self):
-        if __cjkcodecs__:
-            readfuncs = ('read', 'readline', 'readlines')
-        else:
-            # standard utf8 codec has broken readline and readlines.
-            readfuncs = ('read',)
+        readfuncs = ('read', 'readline', 'readlines')
         UTF8Reader = codecs.getreader('utf-8')
         for name in readfuncs:
             for sizehint in [None] + range(1, 33) + \
@@ -211,10 +301,5 @@ class TestBase_Mapping(unittest.TestCase):
             self.assertEqual(unicode(csetch, self.encoding), unich)
 
 def load_teststring(encoding):
-    if __cjkcodecs__:
-        etxt = open(os.path.join('sampletexts', encoding) + '.txt').read()
-        utxt = open(os.path.join('sampletexts', encoding) + '.utf8').read()
-        return (etxt, utxt)
-    else:
-        from test import cjkencodings_test
-        return cjkencodings_test.teststring[encoding]
+    from test import cjkencodings_test
+    return cjkencodings_test.teststring[encoding]
index fd048d94869aae85c332a7a93b9dd4d8f30290b2..fb51297ae06bf29828e10a764213340bce8344ac 100644 (file)
@@ -217,11 +217,8 @@ ENCODER(gb18030)
                                        break;
                                }
 
-                       if (utrrange->first == 0) {
-                               PyErr_SetString(PyExc_RuntimeError,
-                                               "unicode mapping invalid");
+                       if (utrrange->first == 0)
                                return 1;
-                       }
                        continue;
                }
 
index f51b6f29cccf8789c7a710d10b08f85d8755d422..26d5c944c9d28e10b2cb836888a0e2e1b2c85018 100644 (file)
@@ -6,6 +6,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
+#include "structmember.h"
 #include "multibytecodec.h"
 
 typedef struct {
@@ -38,22 +39,14 @@ that encoding errors raise a UnicodeDecodeError. Other possible values\n\
 are 'ignore' and 'replace' as well as any other name registerd with\n\
 codecs.register_error that is able to handle UnicodeDecodeErrors.");
 
-PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
-"I.StreamReader(stream[, errors]) -> StreamReader instance");
-
-PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
-"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
-
 static char *codeckwarglist[] = {"input", "errors", NULL};
+static char *incnewkwarglist[] = {"errors", NULL};
+static char *incrementalkwarglist[] = {"input", "final", NULL};
 static char *streamkwarglist[] = {"stream", "errors", NULL};
 
 static PyObject *multibytecodec_encode(MultibyteCodec *,
                MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
                PyObject *, int);
-static PyObject *mbstreamreader_create(MultibyteCodec *,
-               PyObject *, const char *);
-static PyObject *mbstreamwriter_create(MultibyteCodec *,
-               PyObject *, const char *);
 
 #define MBENC_RESET    MBENC_MAX<<1 /* reset after an encoding session */
 
@@ -83,7 +76,7 @@ make_tuple(PyObject *object, Py_ssize_t len)
 }
 
 static PyObject *
-get_errorcallback(const char *errors)
+internal_error_callback(const char *errors)
 {
        if (errors == NULL || strcmp(errors, "strict") == 0)
                return ERROR_STRICT;
@@ -91,17 +84,88 @@ get_errorcallback(const char *errors)
                return ERROR_IGNORE;
        else if (strcmp(errors, "replace") == 0)
                return ERROR_REPLACE;
+       else
+               return PyString_FromString(errors);
+}
+
+static PyObject *
+call_error_callback(PyObject *errors, PyObject *exc)
+{
+       PyObject *args, *cb, *r;
+
+       assert(PyString_Check(errors));
+       cb = PyCodec_LookupError(PyString_AS_STRING(errors));
+       if (cb == NULL)
+               return NULL;
+
+       args = PyTuple_New(1);
+       if (args == NULL) {
+               Py_DECREF(cb);
+               return NULL;
+       }
+
+       PyTuple_SET_ITEM(args, 0, exc);
+       Py_INCREF(exc);
+
+       r = PyObject_CallObject(cb, args);
+       Py_DECREF(args);
+       Py_DECREF(cb);
+       return r;
+}
+
+static PyObject *
+codecctx_errors_get(MultibyteStatefulCodecContext *self)
+{
+       const char *errors;
+
+       if (self->errors == ERROR_STRICT)
+               errors = "strict";
+       else if (self->errors == ERROR_IGNORE)
+               errors = "ignore";
+       else if (self->errors == ERROR_REPLACE)
+               errors = "replace";
        else {
-               return PyCodec_LookupError(errors);
+               Py_INCREF(self->errors);
+               return self->errors;
+       }
+
+       return PyString_FromString(errors);
+}
+
+static int
+codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
+                   void *closure)
+{
+       PyObject *cb;
+
+       if (!PyString_Check(value)) {
+               PyErr_SetString(PyExc_TypeError, "errors must be a string");
+               return -1;
        }
+
+       cb = internal_error_callback(PyString_AS_STRING(value));
+       if (cb == NULL)
+               return -1;
+
+       ERROR_DECREF(self->errors);
+       self->errors = cb;
+       return 0;
 }
 
+/* This getset handlers list is used by all the stateful codec objects */
+static PyGetSetDef codecctx_getsets[] = {
+       {"errors",      (getter)codecctx_errors_get,
+                       (setter)codecctx_errors_set,
+                       PyDoc_STR("how to treat errors")},
+       {NULL,}
+};
+
 static int
 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
 {
        Py_ssize_t orgpos, orgsize;
 
-       orgpos = (Py_ssize_t)((char*)buf->outbuf -
+       orgpos = (Py_ssize_t)((char *)buf->outbuf -
                                PyString_AS_STRING(buf->outobj));
        orgsize = PyString_GET_SIZE(buf->outobj);
        if (_PyString_Resize(&buf->outobj, orgsize + (
@@ -125,8 +189,7 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
 {
        Py_ssize_t orgpos, orgsize;
 
-       orgpos = (Py_ssize_t)(buf->outbuf -
-                               PyUnicode_AS_UNICODE(buf->outobj));
+       orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
        orgsize = PyUnicode_GET_SIZE(buf->outobj);
        if (PyUnicode_Resize(&buf->outobj, orgsize + (
            esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
@@ -144,16 +207,21 @@ expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
                        goto errorexit;                                 \
 }
 
+
+/**
+ * MultibyteCodec object
+ */
+
 static int
 multibytecodec_encerror(MultibyteCodec *codec,
                        MultibyteCodec_State *state,
                        MultibyteEncodeBuffer *buf,
                        PyObject *errors, Py_ssize_t e)
 {
-       PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
+       PyObject *retobj = NULL, *retstr = NULL, *tobj;
        Py_ssize_t retstrsize, newpos;
-       const char *reason;
        Py_ssize_t esize, start, end;
+       const char *reason;
 
        if (e > 0) {
                reason = "illegal multibyte sequence";
@@ -166,7 +234,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
                        return 0; /* retry it */
                case MBERR_TOOFEW:
                        reason = "incomplete multibyte sequence";
-                       esize = (size_t)(buf->inbuf_end - buf->inbuf);
+                       esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
                        break;
                case MBERR_INTERNAL:
                        PyErr_SetString(PyExc_RuntimeError,
@@ -230,21 +298,14 @@ multibytecodec_encerror(MultibyteCodec *codec,
                goto errorexit;
        }
 
-       argsobj = PyTuple_New(1);
-       if (argsobj == NULL)
-               goto errorexit;
-
-       PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
-       Py_INCREF(buf->excobj);
-       retobj = PyObject_CallObject(errors, argsobj);
-       Py_DECREF(argsobj);
+       retobj = call_error_callback(errors, buf->excobj);
        if (retobj == NULL)
                goto errorexit;
 
        if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
            !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
            !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
-               PyErr_SetString(PyExc_ValueError,
+               PyErr_SetString(PyExc_TypeError,
                                "encoding error handler must return "
                                "(unicode, int) tuple");
                goto errorexit;
@@ -293,7 +354,7 @@ multibytecodec_decerror(MultibyteCodec *codec,
                        MultibyteDecodeBuffer *buf,
                        PyObject *errors, Py_ssize_t e)
 {
-       PyObject *argsobj, *retobj = NULL, *retuni = NULL;
+       PyObject *retobj = NULL, *retuni = NULL;
        Py_ssize_t retunisize, newpos;
        const char *reason;
        Py_ssize_t esize, start, end;
@@ -309,7 +370,7 @@ multibytecodec_decerror(MultibyteCodec *codec,
                        return 0; /* retry it */
                case MBERR_TOOFEW:
                        reason = "incomplete multibyte sequence";
-                       esize = (size_t)(buf->inbuf_end - buf->inbuf);
+                       esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
                        break;
                case MBERR_INTERNAL:
                        PyErr_SetString(PyExc_RuntimeError,
@@ -354,21 +415,14 @@ multibytecodec_decerror(MultibyteCodec *codec,
                goto errorexit;
        }
 
-       argsobj = PyTuple_New(1);
-       if (argsobj == NULL)
-               goto errorexit;
-
-       PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
-       Py_INCREF(buf->excobj);
-       retobj = PyObject_CallObject(errors, argsobj);
-       Py_DECREF(argsobj);
+       retobj = call_error_callback(errors, buf->excobj);
        if (retobj == NULL)
                goto errorexit;
 
        if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
            !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
            !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
-               PyErr_SetString(PyExc_ValueError,
+               PyErr_SetString(PyExc_TypeError,
                                "decoding error handler must return "
                                "(unicode, int) tuple");
                goto errorexit;
@@ -453,7 +507,7 @@ multibytecodec_encode(MultibyteCodec *codec,
                                goto errorexit;
                }
 
-       finalsize = (Py_ssize_t)((char*)buf.outbuf -
+       finalsize = (Py_ssize_t)((char *)buf.outbuf -
                                 PyString_AS_STRING(buf.outobj));
 
        if (finalsize != PyString_GET_SIZE(buf.outobj))
@@ -500,7 +554,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
        data = PyUnicode_AS_UNICODE(arg);
        datalen = PyUnicode_GET_SIZE(arg);
 
-       errorcb = get_errorcallback(errors);
+       errorcb = internal_error_callback(errors);
        if (errorcb == NULL) {
                Py_XDECREF(ucvt);
                return NULL;
@@ -515,16 +569,12 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
        if (r == NULL)
                goto errorexit;
 
-       if (errorcb > ERROR_MAX) {
-               Py_DECREF(errorcb);
-       }
+       ERROR_DECREF(errorcb);
        Py_XDECREF(ucvt);
        return make_tuple(r, datalen);
 
 errorexit:
-       if (errorcb > ERROR_MAX) {
-               Py_DECREF(errorcb);
-       }
+       ERROR_DECREF(errorcb);
        Py_XDECREF(ucvt);
        return NULL;
 }
@@ -543,18 +593,16 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
                                codeckwarglist, &data, &datalen, &errors))
                return NULL;
 
-       errorcb = get_errorcallback(errors);
+       errorcb = internal_error_callback(errors);
        if (errorcb == NULL)
                return NULL;
 
        if (datalen == 0) {
-               if (errorcb > ERROR_MAX) {
-                       Py_DECREF(errorcb);
-               }
+               ERROR_DECREF(errorcb);
                return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
        }
 
-       buf.outobj = buf.excobj = NULL;
+       buf.excobj = NULL;
        buf.inbuf = buf.inbuf_top = (unsigned char *)data;
        buf.inbuf_end = buf.inbuf_top + datalen;
        buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
@@ -590,49 +638,17 @@ MultibyteCodec_Decode(MultibyteCodecObject *self,
                        goto errorexit;
 
        Py_XDECREF(buf.excobj);
-       if (errorcb > ERROR_MAX) {
-               Py_DECREF(errorcb);
-       }
+       ERROR_DECREF(errorcb);
        return make_tuple(buf.outobj, datalen);
 
 errorexit:
-       if (errorcb > ERROR_MAX) {
-               Py_DECREF(errorcb);
-       }
+       ERROR_DECREF(errorcb);
        Py_XDECREF(buf.excobj);
        Py_XDECREF(buf.outobj);
 
        return NULL;
 }
 
-static PyObject *
-MultibyteCodec_StreamReader(MultibyteCodecObject *self,
-                           PyObject *args, PyObject *kwargs)
-{
-       PyObject *stream;
-       char *errors = NULL;
-
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
-                               streamkwarglist, &stream, &errors))
-               return NULL;
-
-       return mbstreamreader_create(self->codec, stream, errors);
-}
-
-static PyObject *
-MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
-                           PyObject *args, PyObject *kwargs)
-{
-       PyObject *stream;
-       char *errors = NULL;
-
-       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
-                               streamkwarglist, &stream, &errors))
-               return NULL;
-
-       return mbstreamwriter_create(self->codec, stream, errors);
-}
-
 static struct PyMethodDef multibytecodec_methods[] = {
        {"encode",      (PyCFunction)MultibyteCodec_Encode,
                        METH_VARARGS | METH_KEYWORDS,
@@ -640,12 +656,6 @@ static struct PyMethodDef multibytecodec_methods[] = {
        {"decode",      (PyCFunction)MultibyteCodec_Decode,
                        METH_VARARGS | METH_KEYWORDS,
                        MultibyteCodec_Decode__doc__},
-       {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
-                       METH_VARARGS | METH_KEYWORDS,
-                       MultibyteCodec_StreamReader__doc__},
-       {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
-                       METH_VARARGS | METH_KEYWORDS,
-                       MultibyteCodec_StreamWriter__doc__},
        {NULL,          NULL},
 };
 
@@ -655,8 +665,6 @@ multibytecodec_dealloc(MultibyteCodecObject *self)
        PyObject_Del(self);
 }
 
-
-
 static PyTypeObject MultibyteCodec_Type = {
        PyObject_HEAD_INIT(NULL)
        0,                              /* ob_size */
@@ -690,244 +698,740 @@ static PyTypeObject MultibyteCodec_Type = {
        multibytecodec_methods,         /* tp_methods */
 };
 
-static PyObject *
-mbstreamreader_iread(MultibyteStreamReaderObject *self,
-                    const char *method, Py_ssize_t sizehint)
-{
-       MultibyteDecodeBuffer buf;
-       PyObject *cres;
-       Py_ssize_t rsize, r, finalsize = 0;
 
-       if (sizehint == 0)
-               return PyUnicode_FromUnicode(NULL, 0);
+/**
+ * Utility functions for stateful codec mechanism
+ */
 
-       buf.outobj = buf.excobj = NULL;
-       cres = NULL;
+#define STATEFUL_DCTX(o)       ((MultibyteStatefulDecoderContext *)(o))
+#define STATEFUL_ECTX(o)       ((MultibyteStatefulEncoderContext *)(o))
 
-       for (;;) {
-               if (sizehint < 0)
-                       cres = PyObject_CallMethod(self->stream,
-                                       (char *)method, NULL);
-               else
-                       cres = PyObject_CallMethod(self->stream,
-                                       (char *)method, "i", sizehint);
-               if (cres == NULL)
-                       goto errorexit;
+static PyObject *
+encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
+                       PyObject *unistr, int final)
+{
+       PyObject *ucvt, *r = NULL;
+       Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
+       Py_ssize_t datalen, origpending;
 
-               if (!PyString_Check(cres)) {
+       if (PyUnicode_Check(unistr))
+               ucvt = NULL;
+       else {
+               unistr = ucvt = PyObject_Unicode(unistr);
+               if (unistr == NULL)
+                       return NULL;
+               else if (!PyUnicode_Check(unistr)) {
                        PyErr_SetString(PyExc_TypeError,
-                                       "stream function returned a "
-                                       "non-string object");
-                       goto errorexit;
-               }
-
-               if (self->pendingsize > 0) {
-                       PyObject *ctr;
-                       char *ctrdata;
-
-                       rsize = PyString_GET_SIZE(cres) + self->pendingsize;
-                       ctr = PyString_FromStringAndSize(NULL, rsize);
-                       if (ctr == NULL)
-                               goto errorexit;
-                       ctrdata = PyString_AS_STRING(ctr);
-                       memcpy(ctrdata, self->pending, self->pendingsize);
-                       memcpy(ctrdata + self->pendingsize,
-                               PyString_AS_STRING(cres),
-                               PyString_GET_SIZE(cres));
-                       Py_DECREF(cres);
-                       cres = ctr;
-                       self->pendingsize = 0;
-               }
-
-               rsize = PyString_GET_SIZE(cres);
-               buf.inbuf = buf.inbuf_top =
-                       (unsigned char *)PyString_AS_STRING(cres);
-               buf.inbuf_end = buf.inbuf_top + rsize;
-               if (buf.outobj == NULL) {
-                       buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
-                       if (buf.outobj == NULL)
-                               goto errorexit;
-                       buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
-                       buf.outbuf_end = buf.outbuf +
-                                       PyUnicode_GET_SIZE(buf.outobj);
-               }
-
-               r = 0;
-               if (rsize > 0)
-                       while (buf.inbuf < buf.inbuf_end) {
-                               Py_ssize_t inleft, outleft;
-
-                               inleft = (Py_ssize_t)(buf.inbuf_end -
-                                                     buf.inbuf);
-                               outleft = (Py_ssize_t)(buf.outbuf_end -
-                                                      buf.outbuf);
-
-                               r = self->codec->decode(&self->state,
-                                                       self->codec->config,
-                                                       &buf.inbuf, inleft,
-                                                       &buf.outbuf, outleft);
-                               if (r == 0 || r == MBERR_TOOFEW)
-                                       break;
-                               else if (multibytecodec_decerror(self->codec,
-                                               &self->state, &buf,
-                                               self->errors, r))
-                                       goto errorexit;
-                       }
-
-               if (rsize == 0 || sizehint < 0) { /* end of file */
-                       if (buf.inbuf < buf.inbuf_end &&
-                           multibytecodec_decerror(self->codec, &self->state,
-                                       &buf, self->errors, MBERR_TOOFEW))
-                               goto errorexit;
+                               "couldn't convert the object to unicode.");
+                       Py_DECREF(ucvt);
+                       return NULL;
                }
+       }
 
-               if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
-                       Py_ssize_t npendings;
-
-                       /* we can't assume that pendingsize is still 0 here.
-                        * because this function can be called recursively
-                        * from error callback */
-                       npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
-                       if (npendings + self->pendingsize > MAXDECPENDING) {
-                               PyErr_SetString(PyExc_RuntimeError,
-                                               "pending buffer overflow");
-                               goto errorexit;
-                       }
-                       memcpy(self->pending + self->pendingsize, buf.inbuf,
-                               npendings);
-                       self->pendingsize += npendings;
-               }
+       datalen = PyUnicode_GET_SIZE(unistr);
+       origpending = ctx->pendingsize;
 
-               finalsize = (Py_ssize_t)(buf.outbuf -
-                               PyUnicode_AS_UNICODE(buf.outobj));
-               Py_DECREF(cres);
-               cres = NULL;
+       if (ctx->pendingsize > 0) {
+               inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
+               if (inbuf_tmp == NULL)
+                       goto errorexit;
+               memcpy(inbuf_tmp, ctx->pending,
+                       Py_UNICODE_SIZE * ctx->pendingsize);
+               memcpy(inbuf_tmp + ctx->pendingsize,
+                       PyUnicode_AS_UNICODE(unistr),
+                       Py_UNICODE_SIZE * datalen);
+               datalen += ctx->pendingsize;
+               ctx->pendingsize = 0;
+               inbuf = inbuf_tmp;
+       }
+       else
+               inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
 
-               if (sizehint < 0 || finalsize != 0 || rsize == 0)
-                       break;
+       inbuf_end = inbuf + datalen;
 
-               sizehint = 1; /* read 1 more byte and retry */
+       r = multibytecodec_encode(ctx->codec, &ctx->state,
+                       (const Py_UNICODE **)&inbuf,
+                       datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+       if (r == NULL) {
+               /* recover the original pending buffer */
+               memcpy(ctx->pending, inbuf_tmp, Py_UNICODE_SIZE * origpending);
+               ctx->pendingsize = origpending;
+               goto errorexit;
        }
 
-       if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
-               if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+       if (inbuf < inbuf_end) {
+               ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
+               if (ctx->pendingsize > MAXENCPENDING) {
+                       /* normal codecs can't reach here */
+                       ctx->pendingsize = 0;
+                       PyErr_SetString(PyExc_UnicodeError,
+                                       "pending buffer overflow");
                        goto errorexit;
+               }
+               memcpy(ctx->pending, inbuf,
+                       ctx->pendingsize * Py_UNICODE_SIZE);
+       }
 
-       Py_XDECREF(cres);
-       Py_XDECREF(buf.excobj);
-       return buf.outobj;
+       if (inbuf_tmp != NULL)
+               PyMem_Del(inbuf_tmp);
+       Py_XDECREF(ucvt);
+       return r;
 
 errorexit:
-       Py_XDECREF(cres);
-       Py_XDECREF(buf.excobj);
-       Py_XDECREF(buf.outobj);
+       if (inbuf_tmp != NULL)
+               PyMem_Del(inbuf_tmp);
+       Py_XDECREF(r);
+       Py_XDECREF(ucvt);
        return NULL;
 }
 
-static PyObject *
-mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
+static int
+decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
+                      MultibyteDecodeBuffer *buf)
 {
-       PyObject *sizeobj = NULL;
-       Py_ssize_t size;
+       Py_ssize_t npendings;
 
-       if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
-               return NULL;
+       npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+       if (npendings + ctx->pendingsize > MAXDECPENDING) {
+               PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
+               return -1;
+       }
+       memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
+       ctx->pendingsize += npendings;
+       return 0;
+}
 
-       if (sizeobj == Py_None || sizeobj == NULL)
-               size = -1;
-       else if (PyInt_Check(sizeobj))
-               size = PyInt_AsSsize_t(sizeobj);
-       else {
-               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-               return NULL;
+static int
+decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
+                      Py_ssize_t size)
+{
+       buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
+       buf->inbuf_end = buf->inbuf_top + size;
+       if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
+               buf->outobj = PyUnicode_FromUnicode(NULL, size);
+               if (buf->outobj == NULL)
+                       return -1;
+               buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
+               buf->outbuf_end = buf->outbuf +
+                                 PyUnicode_GET_SIZE(buf->outobj);
        }
 
-       return mbstreamreader_iread(self, "read", size);
+       return 0;
 }
 
-static PyObject *
-mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
+static int
+decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
+                   MultibyteDecodeBuffer *buf)
 {
-       PyObject *sizeobj = NULL;
-       Py_ssize_t size;
+       while (buf->inbuf < buf->inbuf_end) {
+               Py_ssize_t inleft, outleft;
+               int r;
 
-       if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
-               return NULL;
+               inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
+               outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
 
-       if (sizeobj == Py_None || sizeobj == NULL)
-               size = -1;
-       else if (PyInt_Check(sizeobj))
-               size = PyInt_AsSsize_t(sizeobj);
-       else {
-               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-               return NULL;
+               r = ctx->codec->decode(&ctx->state, ctx->codec->config,
+                       &buf->inbuf, inleft, &buf->outbuf, outleft);
+               if (r == 0 || r == MBERR_TOOFEW)
+                       break;
+               else if (multibytecodec_decerror(ctx->codec, &ctx->state,
+                                                buf, ctx->errors, r))
+                       return -1;
        }
-
-       return mbstreamreader_iread(self, "readline", size);
+       return 0;
 }
 
-static PyObject *
-mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
-{
-       PyObject *sizehintobj = NULL, *r, *sr;
-       Py_ssize_t sizehint;
 
-       if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
-               return NULL;
+/**
+ * MultibyteIncrementalEncoder object
+ */
 
-       if (sizehintobj == Py_None || sizehintobj == NULL)
-               sizehint = -1;
-       else if (PyInt_Check(sizehintobj))
-               sizehint = PyInt_AsSsize_t(sizehintobj);
-       else {
-               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
-               return NULL;
-       }
+static PyObject *
+mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
+                 PyObject *args, PyObject *kwargs)
+{
+       PyObject *data;
+       int final = 0;
 
-       r = mbstreamreader_iread(self, "read", sizehint);
-       if (r == NULL)
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
+                       incrementalkwarglist, &data, &final))
                return NULL;
 
-       sr = PyUnicode_Splitlines(r, 1);
-       Py_DECREF(r);
-       return sr;
+       return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
 }
 
 static PyObject *
-mbstreamreader_reset(MultibyteStreamReaderObject *self)
+mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
 {
        if (self->codec->decreset != NULL &&
            self->codec->decreset(&self->state, self->codec->config) != 0)
                return NULL;
        self->pendingsize = 0;
 
-       Py_INCREF(Py_None);
-       return Py_None;
+       Py_RETURN_NONE;
 }
 
-static struct PyMethodDef mbstreamreader_methods[] = {
-       {"read",        (PyCFunction)mbstreamreader_read,
-                       METH_VARARGS, NULL},
-       {"readline",    (PyCFunction)mbstreamreader_readline,
-                       METH_VARARGS, NULL},
-       {"readlines",   (PyCFunction)mbstreamreader_readlines,
-                       METH_VARARGS, NULL},
+static struct PyMethodDef mbiencoder_methods[] = {
+       {"encode",      (PyCFunction)mbiencoder_encode,
+                       METH_VARARGS | METH_KEYWORDS, NULL},
+       {"reset",       (PyCFunction)mbiencoder_reset,
+                       METH_NOARGS, NULL},
+       {NULL,          NULL},
+};
+
+static PyObject *
+mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+       MultibyteIncrementalEncoderObject *self;
+       PyObject *codec;
+       char *errors = NULL;
+
+       codec = PyObject_GetAttrString((PyObject *)type, "codec");
+       if (codec == NULL)
+               return NULL;
+       if (!MultibyteCodec_Check(codec)) {
+               PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+               return NULL;
+       }
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
+                                        incnewkwarglist, &errors))
+               return NULL;
+
+       self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
+       if (self == NULL)
+               return NULL;
+
+       self->codec = ((MultibyteCodecObject *)codec)->codec;
+       self->pendingsize = 0;
+       self->errors = internal_error_callback(errors);
+       if (self->errors == NULL)
+               goto errorexit;
+       if (self->codec->encinit != NULL &&
+           self->codec->encinit(&self->state, self->codec->config) != 0)
+               goto errorexit;
+
+       return (PyObject *)self;
+
+errorexit:
+       Py_XDECREF(self);
+       return NULL;
+}
+
+static int
+mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
+                   visitproc visit, void *arg)
+{
+       if (ERROR_ISCUSTOM(self->errors))
+               Py_VISIT(self->errors);
+       return 0;
+}
+
+static void
+mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
+{
+       PyObject_GC_UnTrack(self);
+       ERROR_DECREF(self->errors);
+       self->ob_type->tp_free(self);
+}
+
+static PyTypeObject MultibyteIncrementalEncoder_Type = {
+       PyObject_HEAD_INIT(NULL)
+       0,                              /* ob_size */
+       "MultibyteIncrementalEncoder",  /* tp_name */
+       sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
+       0,                              /* tp_itemsize */
+       /*  methods  */
+       (destructor)mbiencoder_dealloc, /* tp_dealloc */
+       0,                              /* tp_print */
+       0,                              /* tp_getattr */
+       0,                              /* tp_setattr */
+       0,                              /* tp_compare */
+       0,                              /* tp_repr */
+       0,                              /* tp_as_number */
+       0,                              /* tp_as_sequence */
+       0,                              /* tp_as_mapping */
+       0,                              /* tp_hash */
+       0,                              /* tp_call */
+       0,                              /* tp_str */
+       PyObject_GenericGetAttr,        /* tp_getattro */
+       0,                              /* tp_setattro */
+       0,                              /* tp_as_buffer */
+       Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+               | Py_TPFLAGS_BASETYPE,  /* tp_flags */
+       0,                              /* tp_doc */
+       (traverseproc)mbiencoder_traverse,      /* tp_traverse */
+       0,                              /* tp_clear */
+       0,                              /* tp_richcompare */
+       0,                              /* tp_weaklistoffset */
+       0,                              /* tp_iter */
+       0,                              /* tp_iterext */
+       mbiencoder_methods,             /* tp_methods */
+       0,                              /* tp_members */
+       codecctx_getsets,               /* tp_getset */
+       0,                              /* tp_base */
+       0,                              /* tp_dict */
+       0,                              /* tp_descr_get */
+       0,                              /* tp_descr_set */
+       0,                              /* tp_dictoffset */
+       0,                              /* tp_init */
+       0,                              /* tp_alloc */
+       mbiencoder_new,                 /* tp_new */
+};
+
+
+/**
+ * MultibyteIncrementalDecoder object
+ */
+
+static PyObject *
+mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
+                 PyObject *args, PyObject *kwargs)
+{
+       MultibyteDecodeBuffer buf;
+       char *data, *wdata;
+       Py_ssize_t wsize, finalsize = 0, size, origpending;
+       int final = 0;
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwargs, "t#|i:decode",
+                       incrementalkwarglist, &data, &size, &final))
+               return NULL;
+
+       buf.outobj = buf.excobj = NULL;
+       origpending = self->pendingsize;
+
+       if (self->pendingsize == 0) {
+               wsize = size;
+               wdata = data;
+       }
+       else {
+               wsize = size + self->pendingsize;
+               wdata = PyMem_Malloc(wsize);
+               if (wdata == NULL)
+                       goto errorexit;
+               memcpy(wdata, self->pending, self->pendingsize);
+               memcpy(wdata + self->pendingsize, data, size);
+               self->pendingsize = 0;
+       }
+
+       if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
+               goto errorexit;
+
+       if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
+               goto errorexit;
+
+       if (final && buf.inbuf < buf.inbuf_end) {
+               if (multibytecodec_decerror(self->codec, &self->state,
+                               &buf, self->errors, MBERR_TOOFEW)) {
+                       /* recover the original pending buffer */
+                       memcpy(self->pending, wdata, origpending);
+                       self->pendingsize = origpending;
+                       goto errorexit;
+               }
+       }
+
+       if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
+               if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
+                       goto errorexit;
+       }
+
+       finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
+       if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+               if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+                       goto errorexit;
+
+       if (wdata != data)
+               PyMem_Del(wdata);
+       Py_XDECREF(buf.excobj);
+       return buf.outobj;
+
+errorexit:
+       if (wdata != NULL && wdata != data)
+               PyMem_Del(wdata);
+       Py_XDECREF(buf.excobj);
+       Py_XDECREF(buf.outobj);
+       return NULL;
+}
+
+static PyObject *
+mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
+{
+       if (self->codec->decreset != NULL &&
+           self->codec->decreset(&self->state, self->codec->config) != 0)
+               return NULL;
+       self->pendingsize = 0;
+
+       Py_RETURN_NONE;
+}
+
+static struct PyMethodDef mbidecoder_methods[] = {
+       {"decode",      (PyCFunction)mbidecoder_decode,
+                       METH_VARARGS | METH_KEYWORDS, NULL},
+       {"reset",       (PyCFunction)mbidecoder_reset,
+                       METH_NOARGS, NULL},
+       {NULL,          NULL},
+};
+
+static PyObject *
+mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+       MultibyteIncrementalDecoderObject *self;
+       PyObject *codec;
+       char *errors = NULL;
+
+       codec = PyObject_GetAttrString((PyObject *)type, "codec");
+       if (codec == NULL)
+               return NULL;
+       if (!MultibyteCodec_Check(codec)) {
+               PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+               return NULL;
+       }
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
+                                        incnewkwarglist, &errors))
+               return NULL;
+
+       self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
+       if (self == NULL)
+               return NULL;
+
+       self->codec = ((MultibyteCodecObject *)codec)->codec;
+       self->pendingsize = 0;
+       self->errors = internal_error_callback(errors);
+       if (self->errors == NULL)
+               goto errorexit;
+       if (self->codec->decinit != NULL &&
+           self->codec->decinit(&self->state, self->codec->config) != 0)
+               goto errorexit;
+
+       return (PyObject *)self;
+
+errorexit:
+       Py_XDECREF(self);
+       return NULL;
+}
+
+static int
+mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
+                   visitproc visit, void *arg)
+{
+       if (ERROR_ISCUSTOM(self->errors))
+               Py_VISIT(self->errors);
+       return 0;
+}
+
+static void
+mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
+{
+       PyObject_GC_UnTrack(self);
+       ERROR_DECREF(self->errors);
+       self->ob_type->tp_free(self);
+}
+
+static PyTypeObject MultibyteIncrementalDecoder_Type = {
+       PyObject_HEAD_INIT(NULL)
+       0,                              /* ob_size */
+       "MultibyteIncrementalDecoder",  /* tp_name */
+       sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
+       0,                              /* tp_itemsize */
+       /*  methods  */
+       (destructor)mbidecoder_dealloc, /* tp_dealloc */
+       0,                              /* tp_print */
+       0,                              /* tp_getattr */
+       0,                              /* tp_setattr */
+       0,                              /* tp_compare */
+       0,                              /* tp_repr */
+       0,                              /* tp_as_number */
+       0,                              /* tp_as_sequence */
+       0,                              /* tp_as_mapping */
+       0,                              /* tp_hash */
+       0,                              /* tp_call */
+       0,                              /* tp_str */
+       PyObject_GenericGetAttr,        /* tp_getattro */
+       0,                              /* tp_setattro */
+       0,                              /* tp_as_buffer */
+       Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+               | Py_TPFLAGS_BASETYPE,  /* tp_flags */
+       0,                              /* tp_doc */
+       (traverseproc)mbidecoder_traverse,      /* tp_traverse */
+       0,                              /* tp_clear */
+       0,                              /* tp_richcompare */
+       0,                              /* tp_weaklistoffset */
+       0,                              /* tp_iter */
+       0,                              /* tp_iterext */
+       mbidecoder_methods,             /* tp_methods */
+       0,                              /* tp_members */
+       codecctx_getsets,               /* tp_getset */
+       0,                              /* tp_base */
+       0,                              /* tp_dict */
+       0,                              /* tp_descr_get */
+       0,                              /* tp_descr_set */
+       0,                              /* tp_dictoffset */
+       0,                              /* tp_init */
+       0,                              /* tp_alloc */
+       mbidecoder_new,                 /* tp_new */
+};
+
+
+/**
+ * MultibyteStreamReader object
+ */
+
+static PyObject *
+mbstreamreader_iread(MultibyteStreamReaderObject *self,
+                    const char *method, Py_ssize_t sizehint)
+{
+       MultibyteDecodeBuffer buf;
+       PyObject *cres;
+       Py_ssize_t rsize, finalsize = 0;
+
+       if (sizehint == 0)
+               return PyUnicode_FromUnicode(NULL, 0);
+
+       buf.outobj = buf.excobj = NULL;
+       cres = NULL;
+
+       for (;;) {
+               if (sizehint < 0)
+                       cres = PyObject_CallMethod(self->stream,
+                                       (char *)method, NULL);
+               else
+                       cres = PyObject_CallMethod(self->stream,
+                                       (char *)method, "i", sizehint);
+               if (cres == NULL)
+                       goto errorexit;
+
+               if (!PyString_Check(cres)) {
+                       PyErr_SetString(PyExc_TypeError,
+                                       "stream function returned a "
+                                       "non-string object");
+                       goto errorexit;
+               }
+
+               if (self->pendingsize > 0) {
+                       PyObject *ctr;
+                       char *ctrdata;
+
+                       rsize = PyString_GET_SIZE(cres) + self->pendingsize;
+                       ctr = PyString_FromStringAndSize(NULL, rsize);
+                       if (ctr == NULL)
+                               goto errorexit;
+                       ctrdata = PyString_AS_STRING(ctr);
+                       memcpy(ctrdata, self->pending, self->pendingsize);
+                       memcpy(ctrdata + self->pendingsize,
+                               PyString_AS_STRING(cres),
+                               PyString_GET_SIZE(cres));
+                       Py_DECREF(cres);
+                       cres = ctr;
+                       self->pendingsize = 0;
+               }
+
+               rsize = PyString_GET_SIZE(cres);
+               if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),
+                                          rsize) != 0)
+                       goto errorexit;
+
+               if (rsize > 0 && decoder_feed_buffer(
+                               (MultibyteStatefulDecoderContext *)self, &buf))
+                       goto errorexit;
+
+               if (rsize == 0 || sizehint < 0) { /* end of file */
+                       if (buf.inbuf < buf.inbuf_end &&
+                           multibytecodec_decerror(self->codec, &self->state,
+                                       &buf, self->errors, MBERR_TOOFEW))
+                               goto errorexit;
+               }
+
+               if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
+                       if (decoder_append_pending(STATEFUL_DCTX(self),
+                                                  &buf) != 0)
+                               goto errorexit;
+               }
+
+               finalsize = (Py_ssize_t)(buf.outbuf -
+                               PyUnicode_AS_UNICODE(buf.outobj));
+               Py_DECREF(cres);
+               cres = NULL;
+
+               if (sizehint < 0 || finalsize != 0 || rsize == 0)
+                       break;
+
+               sizehint = 1; /* read 1 more byte and retry */
+       }
+
+       if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
+               if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
+                       goto errorexit;
+
+       Py_XDECREF(cres);
+       Py_XDECREF(buf.excobj);
+       return buf.outobj;
+
+errorexit:
+       Py_XDECREF(cres);
+       Py_XDECREF(buf.excobj);
+       Py_XDECREF(buf.outobj);
+       return NULL;
+}
+
+static PyObject *
+mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
+{
+       PyObject *sizeobj = NULL;
+       Py_ssize_t size;
+
+       if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
+               return NULL;
+
+       if (sizeobj == Py_None || sizeobj == NULL)
+               size = -1;
+       else if (PyInt_Check(sizeobj))
+               size = PyInt_AsSsize_t(sizeobj);
+       else {
+               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+               return NULL;
+       }
+
+       return mbstreamreader_iread(self, "read", size);
+}
+
+static PyObject *
+mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
+{
+       PyObject *sizeobj = NULL;
+       Py_ssize_t size;
+
+       if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
+               return NULL;
+
+       if (sizeobj == Py_None || sizeobj == NULL)
+               size = -1;
+       else if (PyInt_Check(sizeobj))
+               size = PyInt_AsSsize_t(sizeobj);
+       else {
+               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+               return NULL;
+       }
+
+       return mbstreamreader_iread(self, "readline", size);
+}
+
+static PyObject *
+mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
+{
+       PyObject *sizehintobj = NULL, *r, *sr;
+       Py_ssize_t sizehint;
+
+       if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
+               return NULL;
+
+       if (sizehintobj == Py_None || sizehintobj == NULL)
+               sizehint = -1;
+       else if (PyInt_Check(sizehintobj))
+               sizehint = PyInt_AsSsize_t(sizehintobj);
+       else {
+               PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
+               return NULL;
+       }
+
+       r = mbstreamreader_iread(self, "read", sizehint);
+       if (r == NULL)
+               return NULL;
+
+       sr = PyUnicode_Splitlines(r, 1);
+       Py_DECREF(r);
+       return sr;
+}
+
+static PyObject *
+mbstreamreader_reset(MultibyteStreamReaderObject *self)
+{
+       if (self->codec->decreset != NULL &&
+           self->codec->decreset(&self->state, self->codec->config) != 0)
+               return NULL;
+       self->pendingsize = 0;
+
+       Py_RETURN_NONE;
+}
+
+static struct PyMethodDef mbstreamreader_methods[] = {
+       {"read",        (PyCFunction)mbstreamreader_read,
+                       METH_VARARGS, NULL},
+       {"readline",    (PyCFunction)mbstreamreader_readline,
+                       METH_VARARGS, NULL},
+       {"readlines",   (PyCFunction)mbstreamreader_readlines,
+                       METH_VARARGS, NULL},
        {"reset",       (PyCFunction)mbstreamreader_reset,
                        METH_NOARGS, NULL},
        {NULL,          NULL},
 };
 
-static void
-mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
+static PyMemberDef mbstreamreader_members[] = {
+       {"stream",      T_OBJECT,
+                       offsetof(MultibyteStreamReaderObject, stream),
+                       READONLY, NULL},
+       {NULL,}
+};
+
+static PyObject *
+mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-       if (self->errors > ERROR_MAX) {
-               Py_DECREF(self->errors);
+       MultibyteStreamReaderObject *self;
+       PyObject *codec, *stream;
+       char *errors = NULL;
+
+       codec = PyObject_GetAttrString((PyObject *)type, "codec");
+       if (codec == NULL)
+               return NULL;
+       if (!MultibyteCodec_Check(codec)) {
+               PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+               return NULL;
        }
-       Py_DECREF(self->stream);
-       PyObject_Del(self);
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
+                               streamkwarglist, &stream, &errors))
+               return NULL;
+
+       self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
+       if (self == NULL)
+               return NULL;
+
+       self->codec = ((MultibyteCodecObject *)codec)->codec;
+       self->stream = stream;
+       Py_INCREF(stream);
+       self->pendingsize = 0;
+       self->errors = internal_error_callback(errors);
+       if (self->errors == NULL)
+               goto errorexit;
+       if (self->codec->decinit != NULL &&
+           self->codec->decinit(&self->state, self->codec->config) != 0)
+               goto errorexit;
+
+       return (PyObject *)self;
+
+errorexit:
+       Py_XDECREF(self);
+       return NULL;
 }
 
+static int
+mbstreamreader_traverse(MultibyteStreamReaderObject *self,
+                       visitproc visit, void *arg)
+{
+       if (ERROR_ISCUSTOM(self->errors))
+               Py_VISIT(self->errors);
+       Py_VISIT(self->stream);
+       return 0;
+}
 
+static void
+mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
+{
+       PyObject_GC_UnTrack(self);
+       ERROR_DECREF(self->errors);
+       Py_DECREF(self->stream);
+       self->ob_type->tp_free(self);
+}
 
 static PyTypeObject MultibyteStreamReader_Type = {
        PyObject_HEAD_INIT(NULL)
@@ -951,97 +1455,49 @@ static PyTypeObject MultibyteStreamReader_Type = {
        PyObject_GenericGetAttr,        /* tp_getattro */
        0,                              /* tp_setattro */
        0,                              /* tp_as_buffer */
-       Py_TPFLAGS_DEFAULT,             /* tp_flags */
+       Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+               | Py_TPFLAGS_BASETYPE,  /* tp_flags */
        0,                              /* tp_doc */
-       0,                              /* tp_traverse */
+       (traverseproc)mbstreamreader_traverse,  /* tp_traverse */
        0,                              /* tp_clear */
        0,                              /* tp_richcompare */
        0,                              /* tp_weaklistoffset */
        0,                              /* tp_iter */
        0,                              /* tp_iterext */
        mbstreamreader_methods,         /* tp_methods */
+       mbstreamreader_members,         /* tp_members */
+       codecctx_getsets,               /* tp_getset */
+       0,                              /* tp_base */
+       0,                              /* tp_dict */
+       0,                              /* tp_descr_get */
+       0,                              /* tp_descr_set */
+       0,                              /* tp_dictoffset */
+       0,                              /* tp_init */
+       0,                              /* tp_alloc */
+       mbstreamreader_new,             /* tp_new */
 };
 
+
+/**
+ * MultibyteStreamWriter object
+ */
+
 static int
 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
                      PyObject *unistr)
 {
-       PyObject *wr, *ucvt, *r = NULL;
-       Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
-       Py_ssize_t datalen;
-
-       if (PyUnicode_Check(unistr))
-               ucvt = NULL;
-       else {
-               unistr = ucvt = PyObject_Unicode(unistr);
-               if (unistr == NULL)
-                       return -1;
-               else if (!PyUnicode_Check(unistr)) {
-                       PyErr_SetString(PyExc_TypeError,
-                               "couldn't convert the object to unicode.");
-                       Py_DECREF(ucvt);
-                       return -1;
-               }
-       }
-
-       datalen = PyUnicode_GET_SIZE(unistr);
-       if (datalen == 0) {
-               Py_XDECREF(ucvt);
-               return 0;
-       }
-
-       if (self->pendingsize > 0) {
-               inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize);
-               if (inbuf_tmp == NULL)
-                       goto errorexit;
-               memcpy(inbuf_tmp, self->pending,
-                       Py_UNICODE_SIZE * self->pendingsize);
-               memcpy(inbuf_tmp + self->pendingsize,
-                       PyUnicode_AS_UNICODE(unistr),
-                       Py_UNICODE_SIZE * datalen);
-               datalen += self->pendingsize;
-               self->pendingsize = 0;
-               inbuf = inbuf_tmp;
-       }
-       else
-               inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
-
-       inbuf_end = inbuf + datalen;
+       PyObject *str, *wr;
 
-       r = multibytecodec_encode(self->codec, &self->state,
-                       (const Py_UNICODE **)&inbuf, datalen, self->errors, 0);
-       if (r == NULL)
-               goto errorexit;
-
-       if (inbuf < inbuf_end) {
-               self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
-               if (self->pendingsize > MAXENCPENDING) {
-                       self->pendingsize = 0;
-                       PyErr_SetString(PyExc_RuntimeError,
-                                       "pending buffer overflow");
-                       goto errorexit;
-               }
-               memcpy(self->pending, inbuf,
-                       self->pendingsize * Py_UNICODE_SIZE);
-       }
+       str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
+       if (str == NULL)
+               return -1;
 
-       wr = PyObject_CallMethod(self->stream, "write", "O", r);
+       wr = PyObject_CallMethod(self->stream, "write", "O", str);
+       Py_DECREF(str);
        if (wr == NULL)
-               goto errorexit;
+               return -1;
 
-       if (inbuf_tmp != NULL)
-               PyMem_Del(inbuf_tmp);
-       Py_DECREF(r);
-       Py_DECREF(wr);
-       Py_XDECREF(ucvt);
        return 0;
-
-errorexit:
-       if (inbuf_tmp != NULL)
-               PyMem_Del(inbuf_tmp);
-       Py_XDECREF(r);
-       Py_XDECREF(ucvt);
-       return -1;
 }
 
 static PyObject *
@@ -1054,10 +1510,8 @@ mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args)
 
        if (mbstreamwriter_iwrite(self, strobj))
                return NULL;
-       else {
-               Py_INCREF(Py_None);
-               return Py_None;
-       }
+       else
+               Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -1087,8 +1541,7 @@ mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args)
                        return NULL;
        }
 
-       Py_INCREF(Py_None);
-       return Py_None;
+       Py_RETURN_NONE;
 }
 
 static PyObject *
@@ -1119,18 +1572,67 @@ mbstreamwriter_reset(MultibyteStreamWriterObject *self)
        }
        Py_DECREF(pwrt);
 
-       Py_INCREF(Py_None);
-       return Py_None;
+       Py_RETURN_NONE;
+}
+
+static PyObject *
+mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+       MultibyteStreamWriterObject *self;
+       PyObject *codec, *stream;
+       char *errors = NULL;
+
+       codec = PyObject_GetAttrString((PyObject *)type, "codec");
+       if (codec == NULL)
+               return NULL;
+       if (!MultibyteCodec_Check(codec)) {
+               PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
+               return NULL;
+       }
+
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
+                               streamkwarglist, &stream, &errors))
+               return NULL;
+
+       self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
+       if (self == NULL)
+               return NULL;
+
+       self->codec = ((MultibyteCodecObject *)codec)->codec;
+       self->stream = stream;
+       Py_INCREF(stream);
+       self->pendingsize = 0;
+       self->errors = internal_error_callback(errors);
+       if (self->errors == NULL)
+               goto errorexit;
+       if (self->codec->encinit != NULL &&
+           self->codec->encinit(&self->state, self->codec->config) != 0)
+               goto errorexit;
+
+       return (PyObject *)self;
+
+errorexit:
+       Py_XDECREF(self);
+       return NULL;
+}
+
+static int
+mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
+                       visitproc visit, void *arg)
+{
+       if (ERROR_ISCUSTOM(self->errors))
+               Py_VISIT(self->errors);
+       Py_VISIT(self->stream);
+       return 0;
 }
 
 static void
 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
 {
-       if (self->errors > ERROR_MAX) {
-               Py_DECREF(self->errors);
-       }
+       PyObject_GC_UnTrack(self);
+       ERROR_DECREF(self->errors);
        Py_DECREF(self->stream);
-       PyObject_Del(self);
+       self->ob_type->tp_free(self);
 }
 
 static struct PyMethodDef mbstreamwriter_methods[] = {
@@ -1143,7 +1645,12 @@ static struct PyMethodDef mbstreamwriter_methods[] = {
        {NULL,          NULL},
 };
 
-
+static PyMemberDef mbstreamwriter_members[] = {
+       {"stream",      T_OBJECT,
+                       offsetof(MultibyteStreamWriterObject, stream),
+                       READONLY, NULL},
+       {NULL,}
+};
 
 static PyTypeObject MultibyteStreamWriter_Type = {
        PyObject_HEAD_INIT(NULL)
@@ -1167,17 +1674,33 @@ static PyTypeObject MultibyteStreamWriter_Type = {
        PyObject_GenericGetAttr,        /* tp_getattro */
        0,                              /* tp_setattro */
        0,                              /* tp_as_buffer */
-       Py_TPFLAGS_DEFAULT,             /* tp_flags */
+       Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+               | Py_TPFLAGS_BASETYPE,  /* tp_flags */
        0,                              /* tp_doc */
-       0,                              /* tp_traverse */
+       (traverseproc)mbstreamwriter_traverse,  /* tp_traverse */
        0,                              /* tp_clear */
        0,                              /* tp_richcompare */
        0,                              /* tp_weaklistoffset */
        0,                              /* tp_iter */
        0,                              /* tp_iterext */
        mbstreamwriter_methods,         /* tp_methods */
+       mbstreamwriter_members,         /* tp_members */
+       codecctx_getsets,               /* tp_getset */
+       0,                              /* tp_base */
+       0,                              /* tp_dict */
+       0,                              /* tp_descr_get */
+       0,                              /* tp_descr_set */
+       0,                              /* tp_dictoffset */
+       0,                              /* tp_init */
+       0,                              /* tp_alloc */
+       mbstreamwriter_new,             /* tp_new */
 };
 
+
+/**
+ * Exposed factory function
+ */
+
 static PyObject *
 __create_codec(PyObject *ignore, PyObject *arg)
 {
@@ -1201,80 +1724,38 @@ __create_codec(PyObject *ignore, PyObject *arg)
        return (PyObject *)self;
 }
 
-static PyObject *
-mbstreamreader_create(MultibyteCodec *codec,
-                     PyObject *stream, const char *errors)
-{
-       MultibyteStreamReaderObject *self;
-
-       self = PyObject_New(MultibyteStreamReaderObject,
-                       &MultibyteStreamReader_Type);
-       if (self == NULL)
-               return NULL;
-
-       self->codec = codec;
-       self->stream = stream;
-       Py_INCREF(stream);
-       self->pendingsize = 0;
-       self->errors = get_errorcallback(errors);
-       if (self->errors == NULL)
-               goto errorexit;
-       if (self->codec->decinit != NULL &&
-           self->codec->decinit(&self->state, self->codec->config) != 0)
-               goto errorexit;
-
-       return (PyObject *)self;
-
-errorexit:
-       Py_XDECREF(self);
-       return NULL;
-}
-
-static PyObject *
-mbstreamwriter_create(MultibyteCodec *codec,
-                     PyObject *stream, const char *errors)
-{
-       MultibyteStreamWriterObject *self;
-
-       self = PyObject_New(MultibyteStreamWriterObject,
-                       &MultibyteStreamWriter_Type);
-       if (self == NULL)
-               return NULL;
-
-       self->codec = codec;
-       self->stream = stream;
-       Py_INCREF(stream);
-       self->pendingsize = 0;
-       self->errors = get_errorcallback(errors);
-       if (self->errors == NULL)
-               goto errorexit;
-       if (self->codec->encinit != NULL &&
-           self->codec->encinit(&self->state, self->codec->config) != 0)
-               goto errorexit;
-
-       return (PyObject *)self;
-
-errorexit:
-       Py_XDECREF(self);
-       return NULL;
-}
-
 static struct PyMethodDef __methods[] = {
        {"__create_codec", (PyCFunction)__create_codec, METH_O},
        {NULL, NULL},
 };
 
-void
+PyMODINIT_FUNC
 init_multibytecodec(void)
 {
+       int i;
+       PyObject *m;
+       PyTypeObject *typelist[] = {
+               &MultibyteIncrementalEncoder_Type,
+               &MultibyteIncrementalDecoder_Type,
+               &MultibyteStreamReader_Type,
+               &MultibyteStreamWriter_Type,
+               NULL
+       };
+
        if (PyType_Ready(&MultibyteCodec_Type) < 0)
                return;
-       if (PyType_Ready(&MultibyteStreamReader_Type) < 0)
-               return;
-       if (PyType_Ready(&MultibyteStreamWriter_Type) < 0)
+
+       m = Py_InitModule("_multibytecodec", __methods);
+       if (m == NULL)
                return;
 
-       Py_InitModule("_multibytecodec", __methods);
+       for (i = 0; typelist[i] != NULL; i++) {
+               if (PyType_Ready(typelist[i]) < 0)
+                       return;
+               Py_INCREF(typelist[i]);
+               PyModule_AddObject(m, typelist[i]->tp_name,
+                                  (PyObject *)typelist[i]);
+       }
 
        if (PyErr_Occurred())
                Py_FatalError("can't initialize the _multibytecodec module");
index ec49c785beb81159a4a203a3225588d320912f31..671ecaee94ecefcdc88a384c379fe36aee29cdba 100644 (file)
@@ -67,24 +67,51 @@ typedef struct {
        MultibyteCodec *codec;
 } MultibyteCodecObject;
 
-#define MAXDECPENDING  8
+#define MultibyteCodec_Check(op) ((op)->ob_type == &MultibyteCodec_Type)
+
+#define _MultibyteStatefulCodec_HEAD           \
+       PyObject_HEAD                           \
+       MultibyteCodec *codec;                  \
+       MultibyteCodec_State state;             \
+       PyObject *errors;
 typedef struct {
-       PyObject_HEAD
-       MultibyteCodec *codec;
-       MultibyteCodec_State state;
-       unsigned char pending[MAXDECPENDING];
-       Py_ssize_t pendingsize;
-       PyObject *stream, *errors;
-} MultibyteStreamReaderObject;
+       _MultibyteStatefulCodec_HEAD
+} MultibyteStatefulCodecContext;
 
 #define MAXENCPENDING  2
+#define _MultibyteStatefulEncoder_HEAD         \
+       _MultibyteStatefulCodec_HEAD            \
+       Py_UNICODE pending[MAXENCPENDING];      \
+       Py_ssize_t pendingsize;
 typedef struct {
-       PyObject_HEAD
-       MultibyteCodec *codec;
-       MultibyteCodec_State state;
-       Py_UNICODE pending[MAXENCPENDING];
+       _MultibyteStatefulEncoder_HEAD
+} MultibyteStatefulEncoderContext;
+
+#define MAXDECPENDING  8
+#define _MultibyteStatefulDecoder_HEAD         \
+       _MultibyteStatefulCodec_HEAD            \
+       unsigned char pending[MAXDECPENDING];   \
        Py_ssize_t pendingsize;
-       PyObject *stream, *errors;
+typedef struct {
+       _MultibyteStatefulDecoder_HEAD
+} MultibyteStatefulDecoderContext;
+
+typedef struct {
+       _MultibyteStatefulEncoder_HEAD
+} MultibyteIncrementalEncoderObject;
+
+typedef struct {
+       _MultibyteStatefulDecoder_HEAD
+} MultibyteIncrementalDecoderObject;
+
+typedef struct {
+       _MultibyteStatefulDecoder_HEAD
+       PyObject *stream;
+} MultibyteStreamReaderObject;
+
+typedef struct {
+       _MultibyteStatefulEncoder_HEAD
+       PyObject *stream;
 } MultibyteStreamWriterObject;
 
 /* positive values for illegal sequences */
@@ -95,7 +122,12 @@ typedef struct {
 #define ERROR_STRICT           (PyObject *)(1)
 #define ERROR_IGNORE           (PyObject *)(2)
 #define ERROR_REPLACE          (PyObject *)(3)
-#define ERROR_MAX              ERROR_REPLACE
+#define ERROR_ISCUSTOM(p)      ((p) < ERROR_STRICT || ERROR_REPLACE < (p))
+#define ERROR_DECREF(p) do {           \
+       if (ERROR_ISCUSTOM(p)) {        \
+               Py_DECREF(p);           \
+       }                               \
+} while (0);
 
 #define MBENC_FLUSH            0x0001 /* encode all characters encodable */
 #define MBENC_MAX              MBENC_FLUSH
index f266d4d218346e008fd58c12852f5ca091d137df..fbd3557571dda777b9d32bf589f367c63501acca 100644 (file)
@@ -15,7 +15,7 @@ RM = /bin/rm
 
 all:   distclean mappings codecs
 
-codecs:        misc windows iso apple ebcdic custom-mappings
+codecs:        misc windows iso apple ebcdic custom-mappings cjk
 
 ### Mappings
 
@@ -72,6 +72,9 @@ ebcdic:       build/
        $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/
        $(RM) -f build/readme.*
 
+cjk:   build/
+       $(PYTHON) gencjkcodecs.py build/
+
 ### Cleanup
 
 clean:
diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py
new file mode 100644 (file)
index 0000000..47627c5
--- /dev/null
@@ -0,0 +1,65 @@
+import os, string
+
+codecs = {
+    'cn': ('gb2312', 'gbk', 'gb18030', 'hz'),
+    'tw': ('big5', 'cp950'),
+    'hk': ('big5hkscs',),
+    'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
+           'euc_jis_2004', 'shift_jis_2004'),
+    'kr': ('cp949', 'euc_kr', 'johab'),
+    'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+                'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
+                'iso2022_kr'),
+}
+
+TEMPLATE = string.Template("""\
+#
+# $encoding.py: Python Unicode Codec for $ENCODING
+#
+# Written by Hye-Shik Chang <perky@FreeBSD.org>
+#
+
+import _codecs_$owner, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_$owner.getcodec('$encoding')
+
+class Codec(codecs.Codec):
+    encode = codec.encode
+    decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+                         codecs.IncrementalEncoder):
+    codec = codec
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+                         codecs.IncrementalDecoder):
+    codec = codec
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+    codec = codec
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+    codec = codec
+
+def getregentry():
+    return codecs.CodecInfo(
+        name='$encoding',
+        encode=Codec().encode,
+        decode=Codec().decode,
+        incrementalencoder=IncrementalEncoder,
+        incrementaldecoder=IncrementalDecoder,
+        streamreader=StreamReader,
+        streamwriter=StreamWriter,
+    )
+""")
+
+def gencodecs(prefix):
+    for loc, encodings in codecs.iteritems():
+        for enc in encodings:
+            code = TEMPLATE.substitute(ENCODING=enc.upper(),
+                                       encoding=enc.lower(),
+                                       owner=loc)
+            codecpath = os.path.join(prefix, enc + '.py')
+            open(codecpath, 'w').write(code)
+
+if __name__ == '__main__':
+    import sys
+    gencodecs(sys.argv[1])