sr.file_encoding = file_encoding
return sr
+### Helpers for charmap-based codecs
+
+def make_identity_dict(rng):
+
+ """ make_identity_dict(rng) -> dict
+
+ Return a dictionary where elements of the rng sequence are
+ mapped to themselves.
+
+ """
+ res = {}
+ for i in rng:
+ res[i]=i
+ return res
+
### Tests
if __name__ == '__main__':
-""" Python Character Mapping Codec generated from 'CP037.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1006.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x06f0, # EXTENDED ARABIC-INDIC DIGIT ZERO
0x00a2: 0x06f1, # EXTENDED ARABIC-INDIC DIGIT ONE
0x00a3: 0x06f2, # EXTENDED ARABIC-INDIC DIGIT TWO
0x00fd: 0xfbae, # ARABIC LETTER YEH BARREE ISOLATED FORM
0x00fe: 0xfe7c, # ARABIC SHADDA ISOLATED FORM
0x00ff: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1026.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1250.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1251.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1252.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x009d: None, # UNDEFINED
0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1253.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00ff: None, # UNDEFINED
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1254.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1255.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: None, # UNDEFINED
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1256.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: 0x067e, # ARABIC LETTER PEH
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1257.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x02d9, # DOT ABOVE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP1258.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
0x00fe: 0x20ab, # DONG SIGN
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP424.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0004: 0x009c, # SELECT
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # REQUIRED NEW LINE
0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED
0x00ff: 0x009f, # EIGHT ONES
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP437.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP500.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
0x00ff: 0x009f, # CONTROL
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP737.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP775.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP850.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP852.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP855.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP856.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL
0x00d0: None, # UNDEFINED
0x00d1: None, # UNDEFINED
0x00d2: None, # UNDEFINED
- 0x00d3: None, # UNDEFINED
+ 0x00d3: None, # UNDEFINEDS
0x00d4: None, # UNDEFINED
0x00d5: None, # UNDEFINED
- 0x00d6: None, # UNDEFINED
+ 0x00d6: None, # UNDEFINEDE
0x00d7: None, # UNDEFINED
0x00d8: None, # UNDEFINED
0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP857.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP860.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP861.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP862.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x05d0, # HEBREW LETTER ALEF
0x0081: 0x05d1, # HEBREW LETTER BET
0x0082: 0x05d2, # HEBREW LETTER GIMEL
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP863.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP864.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0025: 0x066a, # ARABIC PERCENT SIGN
0x0080: 0x00b0, # DEGREE SIGN
0x0081: 0x00b7, # MIDDLE DOT
0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: None, # UNDEFINED
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP865.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
0x00fd: 0x00b2, # SUPERSCRIPT TWO
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP866.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00fd: 0x00a4, # CURRENCY SIGN
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP869.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: None, # UNDEFINED
0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED
0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
0x00fe: 0x25a0, # BLACK SQUARE
0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP874.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x20ac, # EURO SIGN
0x0081: None, # UNDEFINED
0x0082: None, # UNDEFINED
0x00fd: None, # UNDEFINED
0x00fe: None, # UNDEFINED
0x00ff: None, # UNDEFINED
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CP875.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0004: 0x009c, # CONTROL
0x0005: 0x0009, # HORIZONTAL TABULATION
0x0006: 0x0086, # CONTROL
0x00fd: 0x001a, # SUBSTITUTE
0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ff: 0x009f, # CONTROL
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-1.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
-
- def __init__(self,stream,errors='strict'):
-
- codecs.StreamWriter.__init__(self,strict,errors)
+ pass
class StreamReader(Codec,codecs.StreamReader):
pass
### Decoding Map
-decoding_map = {
-
-}
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-10.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
0x00a3: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00f7: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00f9: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
0x00ff: 0x0138, # LATIN SMALL LETTER KRA
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-13.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x201d, # RIGHT DOUBLE QUOTATION MARK
0x00a5: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
0x00a8: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
0x00ff: 0x2019, # RIGHT SINGLE QUOTATION MARK
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-14.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x1e02, # LATIN CAPITAL LETTER B WITH DOT ABOVE
0x00a2: 0x1e03, # LATIN SMALL LETTER B WITH DOT ABOVE
0x00a4: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x00f0: 0x0175, # LATIN SMALL LETTER W WITH CIRCUMFLEX
0x00f7: 0x1e6b, # LATIN SMALL LETTER T WITH DOT ABOVE
0x00fe: 0x0177, # LATIN SMALL LETTER Y WITH CIRCUMFLEX
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-15.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a4: 0x20ac, # EURO SIGN
0x00a6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
0x00a8: 0x0161, # LATIN SMALL LETTER S WITH CARON
0x00bc: 0x0152, # LATIN CAPITAL LIGATURE OE
0x00bd: 0x0153, # LATIN SMALL LIGATURE OE
0x00be: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-2.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x02d8, # BREVE
0x00a3: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
0x00ff: 0x02d9, # DOT ABOVE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-3.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x0126, # LATIN CAPITAL LETTER H WITH STROKE
0x00a2: 0x02d8, # BREVE
+ 0x00a5: None,
0x00a6: 0x0124, # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0x00a9: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00aa: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00ab: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00ac: 0x0134, # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+ 0x00ae: None,
0x00af: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
0x00b1: 0x0127, # LATIN SMALL LETTER H WITH STROKE
0x00b6: 0x0125, # LATIN SMALL LETTER H WITH CIRCUMFLEX
0x00ba: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
0x00bb: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00bc: 0x0135, # LATIN SMALL LETTER J WITH CIRCUMFLEX
+ 0x00be: None,
0x00bf: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x00c3: None,
0x00c5: 0x010a, # LATIN CAPITAL LETTER C WITH DOT ABOVE
0x00c6: 0x0108, # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+ 0x00d0: None,
0x00d5: 0x0120, # LATIN CAPITAL LETTER G WITH DOT ABOVE
0x00d8: 0x011c, # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
0x00dd: 0x016c, # LATIN CAPITAL LETTER U WITH BREVE
0x00de: 0x015c, # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+ 0x00e3: None,
0x00e5: 0x010b, # LATIN SMALL LETTER C WITH DOT ABOVE
0x00e6: 0x0109, # LATIN SMALL LETTER C WITH CIRCUMFLEX
+ 0x00f0: None,
0x00f5: 0x0121, # LATIN SMALL LETTER G WITH DOT ABOVE
0x00f8: 0x011d, # LATIN SMALL LETTER G WITH CIRCUMFLEX
0x00fd: 0x016d, # LATIN SMALL LETTER U WITH BREVE
0x00fe: 0x015d, # LATIN SMALL LETTER S WITH CIRCUMFLEX
0x00ff: 0x02d9, # DOT ABOVE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-4.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
0x00a2: 0x0138, # LATIN SMALL LETTER KRA
0x00a3: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
0x00fd: 0x0169, # LATIN SMALL LETTER U WITH TILDE
0x00fe: 0x016b, # LATIN SMALL LETTER U WITH MACRON
0x00ff: 0x02d9, # DOT ABOVE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-5.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x0401, # CYRILLIC CAPITAL LETTER IO
0x00a2: 0x0402, # CYRILLIC CAPITAL LETTER DJE
0x00a3: 0x0403, # CYRILLIC CAPITAL LETTER GJE
0x00fd: 0x00a7, # SECTION SIGN
0x00fe: 0x045e, # CYRILLIC SMALL LETTER SHORT U
0x00ff: 0x045f, # CYRILLIC SMALL LETTER DZHE
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-6.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x00a1: None,
+ 0x00a2: None,
+ 0x00a3: None,
+ 0x00a5: None,
+ 0x00a6: None,
+ 0x00a7: None,
+ 0x00a8: None,
+ 0x00a9: None,
+ 0x00aa: None,
+ 0x00ab: None,
0x00ac: 0x060c, # ARABIC COMMA
+ 0x00ae: None,
+ 0x00af: None,
+ 0x00b0: None,
+ 0x00b1: None,
+ 0x00b2: None,
+ 0x00b3: None,
+ 0x00b4: None,
+ 0x00b5: None,
+ 0x00b6: None,
+ 0x00b7: None,
+ 0x00b8: None,
+ 0x00b9: None,
+ 0x00ba: None,
0x00bb: 0x061b, # ARABIC SEMICOLON
+ 0x00bc: None,
+ 0x00bd: None,
+ 0x00be: None,
0x00bf: 0x061f, # ARABIC QUESTION MARK
+ 0x00c0: None,
0x00c1: 0x0621, # ARABIC LETTER HAMZA
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
0x00d8: 0x0638, # ARABIC LETTER ZAH
0x00d9: 0x0639, # ARABIC LETTER AIN
0x00da: 0x063a, # ARABIC LETTER GHAIN
+ 0x00db: None,
+ 0x00dc: None,
+ 0x00dd: None,
+ 0x00de: None,
+ 0x00df: None,
0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0x0641, # ARABIC LETTER FEH
0x00e2: 0x0642, # ARABIC LETTER QAF
0x00f0: 0x0650, # ARABIC KASRA
0x00f1: 0x0651, # ARABIC SHADDA
0x00f2: 0x0652, # ARABIC SUKUN
-}
+ 0x00f3: None,
+ 0x00f4: None,
+ 0x00f5: None,
+ 0x00f6: None,
+ 0x00f7: None,
+ 0x00f8: None,
+ 0x00f9: None,
+ 0x00fa: None,
+ 0x00fb: None,
+ 0x00fc: None,
+ 0x00fd: None,
+ 0x00fe: None,
+ 0x00ff: None,
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-7.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00a1: 0x2018, # LEFT SINGLE QUOTATION MARK
0x00a2: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x00a4: None,
+ 0x00a5: None,
+ 0x00aa: None,
+ 0x00ae: None,
0x00af: 0x2015, # HORIZONTAL BAR
0x00b4: 0x0384, # GREEK TONOS
0x00b5: 0x0385, # GREEK DIALYTIKA TONOS
0x00cf: 0x039f, # GREEK CAPITAL LETTER OMICRON
0x00d0: 0x03a0, # GREEK CAPITAL LETTER PI
0x00d1: 0x03a1, # GREEK CAPITAL LETTER RHO
+ 0x00d2: None,
0x00d3: 0x03a3, # GREEK CAPITAL LETTER SIGMA
0x00d4: 0x03a4, # GREEK CAPITAL LETTER TAU
0x00d5: 0x03a5, # GREEK CAPITAL LETTER UPSILON
0x00fc: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
-}
+ 0x00ff: None,
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-8.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x00a1: None,
0x00aa: 0x00d7, # MULTIPLICATION SIGN
- 0x00af: 0x203e, # OVERLINE
0x00ba: 0x00f7, # DIVISION SIGN
+ 0x00bf: None,
+ 0x00c0: None,
+ 0x00c1: None,
+ 0x00c2: None,
+ 0x00c3: None,
+ 0x00c4: None,
+ 0x00c5: None,
+ 0x00c6: None,
+ 0x00c7: None,
+ 0x00c8: None,
+ 0x00c9: None,
+ 0x00ca: None,
+ 0x00cb: None,
+ 0x00cc: None,
+ 0x00cd: None,
+ 0x00ce: None,
+ 0x00cf: None,
+ 0x00d0: None,
+ 0x00d1: None,
+ 0x00d2: None,
+ 0x00d3: None,
+ 0x00d4: None,
+ 0x00d5: None,
+ 0x00d6: None,
+ 0x00d7: None,
+ 0x00d8: None,
+ 0x00d9: None,
+ 0x00da: None,
+ 0x00db: None,
+ 0x00dc: None,
+ 0x00dd: None,
+ 0x00de: None,
0x00df: 0x2017, # DOUBLE LOW LINE
0x00e0: 0x05d0, # HEBREW LETTER ALEF
0x00e1: 0x05d1, # HEBREW LETTER BET
0x00f8: 0x05e8, # HEBREW LETTER RESH
0x00f9: 0x05e9, # HEBREW LETTER SHIN
0x00fa: 0x05ea, # HEBREW LETTER TAV
-}
+ 0x00fb: None,
+ 0x00fc: None,
+ 0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
+ 0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
+ 0x00ff: None,
+})
### Encoding Map
-""" Python Character Mapping Codec generated from '8859-9.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x00d0: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
0x00dd: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
0x00de: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
-
+""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
0x0081: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
0x0082: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x00fd: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
0x00fe: 0x0427, # CYRILLIC CAPITAL LETTER CHE
0x00ff: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
-
+""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
0x00ff: 0x00a4, # CURRENCY SIGN
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'GREEK.TXT'.
-
+""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00b9, # SUPERSCRIPT ONE
0x0082: 0x00b2, # SUPERSCRIPT TWO
0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
0x00ff: None, # UNDEFINED
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
-
+""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
-
+""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
0x00ff: 0x02c7, # CARON
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
-
+""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
-}
+})
### Encoding Map
-""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
-
+""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
### Decoding Map
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
0x00fe: 0x02db, # OGONEK
0x00ff: 0x02c7, # CARON
-}
+})
### Encoding Map
'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866',
'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
- 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
- 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+ 'iso8859_2', 'iso8859_4', 'iso8859_5',
+ 'iso8859_9', 'koi8_r', 'latin_1',
'mac_cyrillic', 'mac_latin2',
### These have undefined mappings:
#'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
#'cp1256', 'cp1257', 'cp1258',
#'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
+ #'iso8859_3', 'iso8859_6', 'iso8859_7',
#'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
### These fail the round-trip:
Py_DECREF(w);
if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found: default to Latin-1 mapping */
+ /* No mapping found means: mapping is undefined. */
PyErr_Clear();
- *p++ = (Py_UNICODE)ch;
- continue;
- }
+ x = Py_None;
+ Py_INCREF(x);
+ } else
goto onError;
}
Py_DECREF(w);
if (x == NULL) {
if (PyErr_ExceptionMatches(PyExc_LookupError)) {
- /* No mapping found: default to Latin-1 mapping if possible */
+ /* No mapping found means: mapping is undefined. */
PyErr_Clear();
- if (ch < 256) {
- *s++ = (char)ch;
- continue;
- }
- else if (!charmap_encoding_error(&p, &s, errors,
- "missing character mapping"))
- continue;
- }
+ x = Py_None;
+ Py_INCREF(x);
+ } else
goto onError;
}
""" Unicode Mapping Parser and Codec Generator.
This script parses Unicode mapping files as available from the Unicode
-site (ftp.unicode.org) and creates Python codec modules from them. The
-codecs use the standard character mapping codec to actually apply the
-mapping.
+site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
+modules from them. The codecs use the standard character mapping codec
+to actually apply the mapping.
Synopsis: gencodec.py dir codec_prefix
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright Guido van Rossum, 2000.
"""#"
lines = f.readlines()
f.close()
enc2uni = {}
+ identity = []
+ unmapped = range(256)
+ for i in range(256):
+ unmapped[i] = i
for line in lines:
line = strip(line)
if not line or line[0] == '#':
comment = ''
else:
comment = comment[1:]
- if enc != uni:
+ if enc < 256:
+ unmapped.remove(enc)
+ if enc == uni:
+ identity.append(enc)
+ else:
+ enc2uni[enc] = (uni,comment)
+ else:
enc2uni[enc] = (uni,comment)
+ # If there are more identity-mapped entries than unmapped entries,
+ # it pays to generate an identity dictionary first, add add explicit
+ # mappings to None for the rest
+ if len(identity)>=len(unmapped):
+ for enc in unmapped:
+ enc2uni[enc] = (None, "")
+ enc2uni['IDENTITY'] = 256
+
return enc2uni
def hexrepr(t,
"""
l = [
'''\
-""" Python Character Mapping Codec generated from '%s'.
+""" Python Character Mapping Codec generated from '%s' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
"""#"
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
-
-decoding_map = {
''' % name,
]
+
+ if map.has_key("IDENTITY"):
+ l.append("decoding_map = codecs.make_identity_dict(range(%d))"
+ % map["IDENTITY"])
+ l.append("decoding_map.update({")
+ splits = 1
+ del map["IDENTITY"]
+ else:
+ l.append("decoding_map = {")
+ splits = 0
+
mappings = map.items()
mappings.sort()
append = l.append
i = 0
- splits = 0
for e,value in mappings:
try:
(u,c) = value
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
else:
append('\t%s: %s,' % (key,unicoderepr(u)))
- i = i + 1
+ i += 1
if i == 4096:
# Split the definition into parts to that the Python
# parser doesn't dump core
append('}')
else:
append('})')
- append('map.update({')
+ append('decoding_map.update({')
i = 0
splits = splits + 1
if splits == 0:
mapnames = os.listdir(dir)
for mapname in mapnames:
- if mapname[-len('.mapping'):] != '.mapping':
+ if not mapname.endswith('.mapping'):
continue
codefile = mapname[:-len('.mapping')] + '.py'
print 'converting %s to %s' % (mapname,