This patch changes the default behaviour of the builtin charmap

author Marc-André Lemburg <mal@egenix.com>

Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)

committer Marc-André Lemburg <mal@egenix.com>

Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)
author Marc-André Lemburg <mal@egenix.com>
Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)
committer Marc-André Lemburg <mal@egenix.com>
Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)
diff --git a/Lib/codecs.py b/Lib/codecs.py

index fca0f8e287c1252cb1c1971af94738390b2daf4e..993113752ef1859750aa83e8f0eb2f4ebf019188 100644 (file)
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -539,6 +539,21 @@ def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
      sr.file_encoding = file_encoding
      return sr
  
+### Helpers for charmap-based codecs
+
+def make_identity_dict(rng):
+
+    """ make_identity_dict(rng) -> dict
+
+        Return a dictionary where elements of the rng sequence are
+        mapped to themselves.
+        
+    """
+    res = {}
+    for i in rng:
+        res[i]=i
+    return res
+
  ### Tests
  
  if __name__ == '__main__':
diff --git a/Lib/encodings/cp037.py b/Lib/encodings/cp037.py

index d60504ca0fd0e62dfaca8f07ffb58d078c9eddd2..5868372b077b0752d797f7035616e3abf0243683 100644 (file)
--- a/Lib/encodings/cp037.py
+++ b/Lib/encodings/cp037.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP037.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP037.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0004: 0x009c, # CONTROL
         0x0005: 0x0009, # HORIZONTAL TABULATION
         0x0006: 0x0086, # CONTROL
@@ -273,7 +273,7 @@ decoding_map = {
         0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
         0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
         0x00ff: 0x009f, # CONTROL
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1006.py b/Lib/encodings/cp1006.py

index 991feed9c7c86535ad84fe4e0c4634e72a8f87d9..593fbb601ccffe7f9f8cf3e2a0f92f04fd35dff8 100644 (file)
--- a/Lib/encodings/cp1006.py
+++ b/Lib/encodings/cp1006.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1006.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1006.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x06f0, #       EXTENDED ARABIC-INDIC DIGIT ZERO
         0x00a2: 0x06f1, #       EXTENDED ARABIC-INDIC DIGIT ONE
         0x00a3: 0x06f2, #       EXTENDED ARABIC-INDIC DIGIT TWO
@@ -131,7 +131,7 @@ decoding_map = {
         0x00fd: 0xfbae, #       ARABIC LETTER YEH BARREE ISOLATED FORM
         0x00fe: 0xfe7c, #       ARABIC SHADDA ISOLATED FORM
         0x00ff: 0xfe7d, #       ARABIC SHADDA MEDIAL FORM
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1026.py b/Lib/encodings/cp1026.py

index ae8086fc37bbec24158920fb4f800926c28f5039..3796a75c093c8476088f0439a2348f1e0ab91211 100644 (file)
--- a/Lib/encodings/cp1026.py
+++ b/Lib/encodings/cp1026.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1026.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1026.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0004: 0x009c, # CONTROL
         0x0005: 0x0009, # HORIZONTAL TABULATION
         0x0006: 0x0086, # CONTROL
@@ -273,7 +273,7 @@ decoding_map = {
         0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
         0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
         0x00ff: 0x009f, # CONTROL
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1250.py b/Lib/encodings/cp1250.py

index d1276c4c6eddea276602e926c85ce16f4a2d1faa..03a3e3177d85bd3883880a29f8d3d7f890589d47 100644 (file)
--- a/Lib/encodings/cp1250.py
+++ b/Lib/encodings/cp1250.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1250.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1250.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -116,7 +116,7 @@ decoding_map = {
         0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
         0x00fe: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
         0x00ff: 0x02d9, # DOT ABOVE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1251.py b/Lib/encodings/cp1251.py

index 42921e465340dd00287f3ffb3b455e326d429af2..e27a122c3829126fbb860e9dfb9daba55e307d7e 100644 (file)
--- a/Lib/encodings/cp1251.py
+++ b/Lib/encodings/cp1251.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1251.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1251.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0402, # CYRILLIC CAPITAL LETTER DJE
         0x0081: 0x0403, # CYRILLIC CAPITAL LETTER GJE
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -150,7 +150,7 @@ decoding_map = {
         0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
         0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
         0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1252.py b/Lib/encodings/cp1252.py

index 07a5358366d7c61acf4c9256544d0b0f58f9a55a..5d7bdd63969b46d90b448f4551a973c05cd05b50 100644 (file)
--- a/Lib/encodings/cp1252.py
+++ b/Lib/encodings/cp1252.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1252.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1252.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -69,7 +69,7 @@ decoding_map = {
         0x009d: None,   # UNDEFINED
         0x009e: 0x017e, # LATIN SMALL LETTER Z WITH CARON
         0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1253.py b/Lib/encodings/cp1253.py

index c84808a254a16998df66f423a92cc77d406d9d23..abc144cc04d0f0babad344c3bf7e77f52f2f82db 100644 (file)
--- a/Lib/encodings/cp1253.py
+++ b/Lib/encodings/cp1253.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1253.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1253.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -144,7 +144,7 @@ decoding_map = {
         0x00fd: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
         0x00fe: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
         0x00ff: None,   # UNDEFINED
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1254.py b/Lib/encodings/cp1254.py

index 9897ecf602f2a07cc787bc3d68ef9fd969f1e307..4a2ab3caf472b8fada2e769bee2efcd6110080fe 100644 (file)
--- a/Lib/encodings/cp1254.py
+++ b/Lib/encodings/cp1254.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1254.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1254.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -75,7 +75,7 @@ decoding_map = {
         0x00f0: 0x011f, # LATIN SMALL LETTER G WITH BREVE
         0x00fd: 0x0131, # LATIN SMALL LETTER DOTLESS I
         0x00fe: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1255.py b/Lib/encodings/cp1255.py

index 5404b46e4a7c58133c3be127d24c3285e5ae0a3a..c987b85f45e7e033c76537714412f03d8130c5a2 100644 (file)
--- a/Lib/encodings/cp1255.py
+++ b/Lib/encodings/cp1255.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1255.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1255.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -136,7 +136,7 @@ decoding_map = {
         0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
         0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
         0x00ff: None,   # UNDEFINED
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1256.py b/Lib/encodings/cp1256.py

index 6bb02dda2145195be6490ff3c18ca134f2e08e0d..d72c5bcc9bbd6b395d333d36f5ecfc1612eee309 100644 (file)
--- a/Lib/encodings/cp1256.py
+++ b/Lib/encodings/cp1256.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1256.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1256.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: 0x067e, # ARABIC LETTER PEH
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -122,7 +122,7 @@ decoding_map = {
         0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
         0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
         0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1257.py b/Lib/encodings/cp1257.py

index ded826c92ca5b70bbde997c35596a8c2edeaf559..d17a90421709d19f238243d7d919172eca64900e 100644 (file)
--- a/Lib/encodings/cp1257.py
+++ b/Lib/encodings/cp1257.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1257.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1257.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -124,7 +124,7 @@ decoding_map = {
         0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
         0x00fe: 0x017e, # LATIN SMALL LETTER Z WITH CARON
         0x00ff: 0x02d9, # DOT ABOVE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp1258.py b/Lib/encodings/cp1258.py

index 955253cf91391b308f276b854d558a1d40fb7135..597f12438c96d96ec22615ee02fff631916f495d 100644 (file)
--- a/Lib/encodings/cp1258.py
+++ b/Lib/encodings/cp1258.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP1258.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP1258.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK
@@ -83,7 +83,7 @@ decoding_map = {
         0x00f5: 0x01a1, # LATIN SMALL LETTER O WITH HORN
         0x00fd: 0x01b0, # LATIN SMALL LETTER U WITH HORN
         0x00fe: 0x20ab, # DONG SIGN
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp424.py b/Lib/encodings/cp424.py

index c4abaecdfb05e57b3a5fe8f9d7737603dec28acc..bc10379808dc980ee0a3d41bfaa21b4b0711ecbb 100644 (file)
--- a/Lib/encodings/cp424.py
+++ b/Lib/encodings/cp424.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP424.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP424.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0004: 0x009c, # SELECT
         0x0005: 0x0009, # HORIZONTAL TABULATION
         0x0006: 0x0086, # REQUIRED NEW LINE
@@ -273,7 +273,7 @@ decoding_map = {
         0x00fd: None,   # UNDEFINED
         0x00fe: None,   # UNDEFINED
         0x00ff: 0x009f, # EIGHT ONES
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp437.py b/Lib/encodings/cp437.py

index ca7d90ea52e33c4bb65a91380a19de3e93b3dda5..db1b88a56794b1ddc5abcfe2af7857573e704e51 100644 (file)
--- a/Lib/encodings/cp437.py
+++ b/Lib/encodings/cp437.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP437.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP437.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp500.py b/Lib/encodings/cp500.py

index 33d6fedb46477661f32549a49c53d077c563ea8e..1c8fb57e1746f02ae5bfd750c0e154de36add90f 100644 (file)
--- a/Lib/encodings/cp500.py
+++ b/Lib/encodings/cp500.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP500.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP500.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0004: 0x009c, # CONTROL
         0x0005: 0x0009, # HORIZONTAL TABULATION
         0x0006: 0x0086, # CONTROL
@@ -273,7 +273,7 @@ decoding_map = {
         0x00fd: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
         0x00fe: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
         0x00ff: 0x009f, # CONTROL
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp737.py b/Lib/encodings/cp737.py

index e55b3dd6b4212744e02de2b771e5ae291cc482eb..03665aea7d19519f48fd5b3d3b5146502b7f2047 100644 (file)
--- a/Lib/encodings/cp737.py
+++ b/Lib/encodings/cp737.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP737.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP737.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
         0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
         0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp775.py b/Lib/encodings/cp775.py

index e43ce2d1bab6603f3c098f7f20051108e7ad5f8b..b38ccb5fe3eb62697c10348540bc90184a32e99c 100644 (file)
--- a/Lib/encodings/cp775.py
+++ b/Lib/encodings/cp775.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP775.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP775.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp850.py b/Lib/encodings/cp850.py

index cb0918c96ba8d11da8f90326edca23cd09836550..e26287b7f3b19de8b7b9dc16041d528818a76aff 100644 (file)
--- a/Lib/encodings/cp850.py
+++ b/Lib/encodings/cp850.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP850.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP850.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp852.py b/Lib/encodings/cp852.py

index ba4f14219afcc6a0f3eb09d6be0d6ee68ca7d815..431d8448f7eb530e5fba0de0350c93e4876ef7f4 100644 (file)
--- a/Lib/encodings/cp852.py
+++ b/Lib/encodings/cp852.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP852.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP852.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp855.py b/Lib/encodings/cp855.py

index c967bcf17c6c876b7ca0011fad4a31f1d86f159c..c9e71687bdee9ba38f2a125481510ae22b60d6eb 100644 (file)
--- a/Lib/encodings/cp855.py
+++ b/Lib/encodings/cp855.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP855.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP855.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
         0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
         0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00a7, # SECTION SIGN
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp856.py b/Lib/encodings/cp856.py

index f384acbf2d2c426332a0c1105b0c5afe5f7b225e..cc2e01f45af103ac3df023afb5c8c28cb0b20625 100644 (file)
--- a/Lib/encodings/cp856.py
+++ b/Lib/encodings/cp856.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP856.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP856.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x05d0, # HEBREW LETTER ALEF
         0x0081: 0x05d1, # HEBREW LETTER BET
         0x0082: 0x05d2, # HEBREW LETTER GIMEL
@@ -120,10 +120,10 @@ decoding_map = {
         0x00d0: None,   # UNDEFINED
         0x00d1: None,   # UNDEFINED
         0x00d2: None,   # UNDEFINED
-       0x00d3: None,   # UNDEFINED
+       0x00d3: None,   # UNDEFINEDS
         0x00d4: None,   # UNDEFINED
         0x00d5: None,   # UNDEFINED
-       0x00d6: None,   # UNDEFINED
+       0x00d6: None,   # UNDEFINEDE
         0x00d7: None,   # UNDEFINED
         0x00d8: None,   # UNDEFINED
         0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp857.py b/Lib/encodings/cp857.py

index 49cc68529a1783dec592f540e451a56cc1f11828..6f4df23a56891bef875d910ea942638cb6990df6 100644 (file)
--- a/Lib/encodings/cp857.py
+++ b/Lib/encodings/cp857.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP857.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP857.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -164,7 +164,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp860.py b/Lib/encodings/cp860.py

index 3b9a15d297c7dd3d80aa11b4f61b64a3cbf9ae7f..057d91870f3b66d09396eabee3e73944c7da564a 100644 (file)
--- a/Lib/encodings/cp860.py
+++ b/Lib/encodings/cp860.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP860.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP860.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp861.py b/Lib/encodings/cp861.py

index 3f07fbac41ddc19692ba20b834027cbde0de2aae..8db3b40b243e9feaa1aa32c6848cb69fcdba4c7b 100644 (file)
--- a/Lib/encodings/cp861.py
+++ b/Lib/encodings/cp861.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP861.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP861.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp862.py b/Lib/encodings/cp862.py

index 4bc1cbed336b574854bbe981889c2a160d36e8ea..1cac3e278a5f326fbb3bd084e56bc34962ea67ea 100644 (file)
--- a/Lib/encodings/cp862.py
+++ b/Lib/encodings/cp862.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP862.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP862.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x05d0, # HEBREW LETTER ALEF
         0x0081: 0x05d1, # HEBREW LETTER BET
         0x0082: 0x05d2, # HEBREW LETTER GIMEL
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp863.py b/Lib/encodings/cp863.py

index 3e6103f6ca933610957914b3eff38d9a9ec2ecb6..ecdc391f74f830774afec75932b7a75f109f0f97 100644 (file)
--- a/Lib/encodings/cp863.py
+++ b/Lib/encodings/cp863.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP863.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP863.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp864.py b/Lib/encodings/cp864.py

index 819327836e63901046639adc3c08fe0d5d3d89cb..861fb00111e1e32fac8ff3fd747fa386cde1cbfc 100644 (file)
--- a/Lib/encodings/cp864.py
+++ b/Lib/encodings/cp864.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP864.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP864.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0025: 0x066a, # ARABIC PERCENT SIGN
         0x0080: 0x00b0, # DEGREE SIGN
         0x0081: 0x00b7, # MIDDLE DOT
@@ -163,7 +163,7 @@ decoding_map = {
         0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: None,   # UNDEFINED
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp865.py b/Lib/encodings/cp865.py

index eaed7a9f9ff6236f46fd3ef1f459f15d42ab1794..4d9010df8fb5b9f72284c57d65fb6a9340c7722a 100644 (file)
--- a/Lib/encodings/cp865.py
+++ b/Lib/encodings/cp865.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP865.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP865.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
         0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
         0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00b2, # SUPERSCRIPT TWO
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp866.py b/Lib/encodings/cp866.py

index 25e1a50c12faad71b27588a51abfc7c0a6ae0552..6a8b0b0752086335234e0d722c73621489828b72 100644 (file)
--- a/Lib/encodings/cp866.py
+++ b/Lib/encodings/cp866.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP866.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP866.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
         0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
         0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x00a4, # CURRENCY SIGN
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp869.py b/Lib/encodings/cp869.py

index 840335f0a8a0cd0c8204637b39efacb4c41b8516..65d2b2e1d09dda131a30daf61c98be849a9b29c5 100644 (file)
--- a/Lib/encodings/cp869.py
+++ b/Lib/encodings/cp869.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP869.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP869.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: None,   # UNDEFINED
         0x0081: None,   # UNDEFINED
         0x0082: None,   # UNDEFINED
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
         0x00fe: 0x25a0, # BLACK SQUARE
         0x00ff: 0x00a0, # NO-BREAK SPACE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp874.py b/Lib/encodings/cp874.py

index 0231c7ac5bb92b2718d708d66dd4ffc77fc9167a..31f4d3d6d4c85bde3691bc156acb68656176f694 100644 (file)
--- a/Lib/encodings/cp874.py
+++ b/Lib/encodings/cp874.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP874.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP874.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x20ac, # EURO SIGN
         0x0081: None,   # UNDEFINED
         0x0082: None,   # UNDEFINED
@@ -164,7 +164,7 @@ decoding_map = {
         0x00fd: None,   # UNDEFINED
         0x00fe: None,   # UNDEFINED
         0x00ff: None,   # UNDEFINED
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/cp875.py b/Lib/encodings/cp875.py

index 924c0a0a269551c294f4927e2ab37180fa5b734c..3500446930a45cae90c7dd6da4b08e57af5286b9 100644 (file)
--- a/Lib/encodings/cp875.py
+++ b/Lib/encodings/cp875.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CP875.TXT'.
-
+""" Python Character Mapping Codec generated from 'CP875.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0004: 0x009c, # CONTROL
         0x0005: 0x0009, # HORIZONTAL TABULATION
         0x0006: 0x0086, # CONTROL
@@ -274,7 +274,7 @@ decoding_map = {
         0x00fd: 0x001a, # SUBSTITUTE
         0x00fe: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
         0x00ff: 0x009f, # CONTROL
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_1.py b/Lib/encodings/iso8859_1.py

index 7355853d745585c161b3f3e4c10660e7d2240a17..f4c0bf750f26744e12ff7c992d9f469b80868137 100644 (file)
--- a/Lib/encodings/iso8859_1.py
+++ b/Lib/encodings/iso8859_1.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-1.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-1.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -22,10 +22,7 @@ class Codec(codecs.Codec):
          return codecs.charmap_decode(input,errors,decoding_map)
  
  class StreamWriter(Codec,codecs.StreamWriter):
-
-    def __init__(self,stream,errors='strict'):
-
-        codecs.StreamWriter.__init__(self,strict,errors)
+    pass
          
  class StreamReader(Codec,codecs.StreamReader):
      pass
@@ -38,9 +35,9 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
-}
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_10.py b/Lib/encodings/iso8859_10.py

index 96b435cc2999b41849ed4aa76df62449d33c3c26..c43c65306a275357a5da16f9fffd4a0bf8f85def 100644 (file)
--- a/Lib/encodings/iso8859_10.py
+++ b/Lib/encodings/iso8859_10.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-10.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-10.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x0104, #       LATIN CAPITAL LETTER A WITH OGONEK
         0x00a2: 0x0112, #       LATIN CAPITAL LETTER E WITH MACRON
         0x00a3: 0x0122, #       LATIN CAPITAL LETTER G WITH CEDILLA
@@ -83,7 +83,7 @@ decoding_map = {
         0x00f7: 0x0169, #       LATIN SMALL LETTER U WITH TILDE
         0x00f9: 0x0173, #       LATIN SMALL LETTER U WITH OGONEK
         0x00ff: 0x0138, #       LATIN SMALL LETTER KRA
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_13.py b/Lib/encodings/iso8859_13.py

index d8b223005a5de3e6040310b97e10fec7aa75850b..2ab52927ec38a0342354a1b1af3a7099d2a812ad 100644 (file)
--- a/Lib/encodings/iso8859_13.py
+++ b/Lib/encodings/iso8859_13.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-13.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-13.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x201d, #       RIGHT DOUBLE QUOTATION MARK
         0x00a5: 0x201e, #       DOUBLE LOW-9 QUOTATION MARK
         0x00a8: 0x00d8, #       LATIN CAPITAL LETTER O WITH STROKE
@@ -93,7 +93,7 @@ decoding_map = {
         0x00fd: 0x017c, #       LATIN SMALL LETTER Z WITH DOT ABOVE
         0x00fe: 0x017e, #       LATIN SMALL LETTER Z WITH CARON
         0x00ff: 0x2019, #       RIGHT SINGLE QUOTATION MARK
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_14.py b/Lib/encodings/iso8859_14.py

index 8ee0aa9ded4c280596c778b10e026ae95e75b310..5533e9617a9885f02e4df72c2c2b877e1a5ea66b 100644 (file)
--- a/Lib/encodings/iso8859_14.py
+++ b/Lib/encodings/iso8859_14.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-14.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-14.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x1e02, #       LATIN CAPITAL LETTER B WITH DOT ABOVE
         0x00a2: 0x1e03, #       LATIN SMALL LETTER B WITH DOT ABOVE
         0x00a4: 0x010a, #       LATIN CAPITAL LETTER C WITH DOT ABOVE
@@ -68,7 +68,7 @@ decoding_map = {
         0x00f0: 0x0175, #       LATIN SMALL LETTER W WITH CIRCUMFLEX
         0x00f7: 0x1e6b, #       LATIN SMALL LETTER T WITH DOT ABOVE
         0x00fe: 0x0177, #       LATIN SMALL LETTER Y WITH CIRCUMFLEX
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_15.py b/Lib/encodings/iso8859_15.py

index 862ff28cef5d7e011f7a6ca310e236fd22f1ad0f..7bffff42b880bfb1f64648549ceda5c6247fcc43 100644 (file)
--- a/Lib/encodings/iso8859_15.py
+++ b/Lib/encodings/iso8859_15.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-15.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-15.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a4: 0x20ac, #       EURO SIGN
         0x00a6: 0x0160, #       LATIN CAPITAL LETTER S WITH CARON
         0x00a8: 0x0161, #       LATIN SMALL LETTER S WITH CARON
@@ -45,7 +45,7 @@ decoding_map = {
         0x00bc: 0x0152, #       LATIN CAPITAL LIGATURE OE
         0x00bd: 0x0153, #       LATIN SMALL LIGATURE OE
         0x00be: 0x0178, #       LATIN CAPITAL LETTER Y WITH DIAERESIS
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_2.py b/Lib/encodings/iso8859_2.py

index 034001a056469ede8baacf920d23a9223eb5d70b..481f9a0a6cbee089611c620e5b50bcbebb61d24e 100644 (file)
--- a/Lib/encodings/iso8859_2.py
+++ b/Lib/encodings/iso8859_2.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-2.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-2.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x0104, #       LATIN CAPITAL LETTER A WITH OGONEK
         0x00a2: 0x02d8, #       BREVE
         0x00a3: 0x0141, #       LATIN CAPITAL LETTER L WITH STROKE
@@ -94,7 +94,7 @@ decoding_map = {
         0x00fb: 0x0171, #       LATIN SMALL LETTER U WITH DOUBLE ACUTE
         0x00fe: 0x0163, #       LATIN SMALL LETTER T WITH CEDILLA
         0x00ff: 0x02d9, #       DOT ABOVE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_3.py b/Lib/encodings/iso8859_3.py

index f262767c2d397f1a0a088fe5767683c5d94081ec..c2820ad6addd2371d9bd798271b4d6275a618759 100644 (file)
--- a/Lib/encodings/iso8859_3.py
+++ b/Lib/encodings/iso8859_3.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-3.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-3.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,15 +35,17 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x0126, #       LATIN CAPITAL LETTER H WITH STROKE
         0x00a2: 0x02d8, #       BREVE
+       0x00a5: None,
         0x00a6: 0x0124, #       LATIN CAPITAL LETTER H WITH CIRCUMFLEX
         0x00a9: 0x0130, #       LATIN CAPITAL LETTER I WITH DOT ABOVE
         0x00aa: 0x015e, #       LATIN CAPITAL LETTER S WITH CEDILLA
         0x00ab: 0x011e, #       LATIN CAPITAL LETTER G WITH BREVE
         0x00ac: 0x0134, #       LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+       0x00ae: None,
         0x00af: 0x017b, #       LATIN CAPITAL LETTER Z WITH DOT ABOVE
         0x00b1: 0x0127, #       LATIN SMALL LETTER H WITH STROKE
         0x00b6: 0x0125, #       LATIN SMALL LETTER H WITH CIRCUMFLEX
@@ -51,21 +53,26 @@ decoding_map = {
         0x00ba: 0x015f, #       LATIN SMALL LETTER S WITH CEDILLA
         0x00bb: 0x011f, #       LATIN SMALL LETTER G WITH BREVE
         0x00bc: 0x0135, #       LATIN SMALL LETTER J WITH CIRCUMFLEX
+       0x00be: None,
         0x00bf: 0x017c, #       LATIN SMALL LETTER Z WITH DOT ABOVE
+       0x00c3: None,
         0x00c5: 0x010a, #       LATIN CAPITAL LETTER C WITH DOT ABOVE
         0x00c6: 0x0108, #       LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+       0x00d0: None,
         0x00d5: 0x0120, #       LATIN CAPITAL LETTER G WITH DOT ABOVE
         0x00d8: 0x011c, #       LATIN CAPITAL LETTER G WITH CIRCUMFLEX
         0x00dd: 0x016c, #       LATIN CAPITAL LETTER U WITH BREVE
         0x00de: 0x015c, #       LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+       0x00e3: None,
         0x00e5: 0x010b, #       LATIN SMALL LETTER C WITH DOT ABOVE
         0x00e6: 0x0109, #       LATIN SMALL LETTER C WITH CIRCUMFLEX
+       0x00f0: None,
         0x00f5: 0x0121, #       LATIN SMALL LETTER G WITH DOT ABOVE
         0x00f8: 0x011d, #       LATIN SMALL LETTER G WITH CIRCUMFLEX
         0x00fd: 0x016d, #       LATIN SMALL LETTER U WITH BREVE
         0x00fe: 0x015d, #       LATIN SMALL LETTER S WITH CIRCUMFLEX
         0x00ff: 0x02d9, #       DOT ABOVE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_4.py b/Lib/encodings/iso8859_4.py

index 29f9fd348c441f6392f82cae3c2062554bf68640..30d6ca6805d5276269b3ea4be5830315fc016061 100644 (file)
--- a/Lib/encodings/iso8859_4.py
+++ b/Lib/encodings/iso8859_4.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-4.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-4.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x0104, #       LATIN CAPITAL LETTER A WITH OGONEK
         0x00a2: 0x0138, #       LATIN SMALL LETTER KRA
         0x00a3: 0x0156, #       LATIN CAPITAL LETTER R WITH CEDILLA
@@ -87,7 +87,7 @@ decoding_map = {
         0x00fd: 0x0169, #       LATIN SMALL LETTER U WITH TILDE
         0x00fe: 0x016b, #       LATIN SMALL LETTER U WITH MACRON
         0x00ff: 0x02d9, #       DOT ABOVE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_5.py b/Lib/encodings/iso8859_5.py

index d71c15f39014ed20638dab4aff24e76315afda66..2bdaa5003c03e7d073a72a24c271a8aa450a0197 100644 (file)
--- a/Lib/encodings/iso8859_5.py
+++ b/Lib/encodings/iso8859_5.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-5.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-5.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x0401, #       CYRILLIC CAPITAL LETTER IO
         0x00a2: 0x0402, #       CYRILLIC CAPITAL LETTER DJE
         0x00a3: 0x0403, #       CYRILLIC CAPITAL LETTER GJE
@@ -131,7 +131,7 @@ decoding_map = {
         0x00fd: 0x00a7, #       SECTION SIGN
         0x00fe: 0x045e, #       CYRILLIC SMALL LETTER SHORT U
         0x00ff: 0x045f, #       CYRILLIC SMALL LETTER DZHE
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_6.py b/Lib/encodings/iso8859_6.py

index b4d4315ca43bd7ef722cd511ee43ba66f9802673..585fa11e5d3629df5bec99cd0d2160497a785460 100644 (file)
--- a/Lib/encodings/iso8859_6.py
+++ b/Lib/encodings/iso8859_6.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-6.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-6.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,11 +35,38 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+       0x00a1: None,
+       0x00a2: None,
+       0x00a3: None,
+       0x00a5: None,
+       0x00a6: None,
+       0x00a7: None,
+       0x00a8: None,
+       0x00a9: None,
+       0x00aa: None,
+       0x00ab: None,
         0x00ac: 0x060c, #       ARABIC COMMA
+       0x00ae: None,
+       0x00af: None,
+       0x00b0: None,
+       0x00b1: None,
+       0x00b2: None,
+       0x00b3: None,
+       0x00b4: None,
+       0x00b5: None,
+       0x00b6: None,
+       0x00b7: None,
+       0x00b8: None,
+       0x00b9: None,
+       0x00ba: None,
         0x00bb: 0x061b, #       ARABIC SEMICOLON
+       0x00bc: None,
+       0x00bd: None,
+       0x00be: None,
         0x00bf: 0x061f, #       ARABIC QUESTION MARK
+       0x00c0: None,
         0x00c1: 0x0621, #       ARABIC LETTER HAMZA
         0x00c2: 0x0622, #       ARABIC LETTER ALEF WITH MADDA ABOVE
         0x00c3: 0x0623, #       ARABIC LETTER ALEF WITH HAMZA ABOVE
@@ -66,6 +93,11 @@ decoding_map = {
         0x00d8: 0x0638, #       ARABIC LETTER ZAH
         0x00d9: 0x0639, #       ARABIC LETTER AIN
         0x00da: 0x063a, #       ARABIC LETTER GHAIN
+       0x00db: None,
+       0x00dc: None,
+       0x00dd: None,
+       0x00de: None,
+       0x00df: None,
         0x00e0: 0x0640, #       ARABIC TATWEEL
         0x00e1: 0x0641, #       ARABIC LETTER FEH
         0x00e2: 0x0642, #       ARABIC LETTER QAF
@@ -85,7 +117,20 @@ decoding_map = {
         0x00f0: 0x0650, #       ARABIC KASRA
         0x00f1: 0x0651, #       ARABIC SHADDA
         0x00f2: 0x0652, #       ARABIC SUKUN
-}
+       0x00f3: None,
+       0x00f4: None,
+       0x00f5: None,
+       0x00f6: None,
+       0x00f7: None,
+       0x00f8: None,
+       0x00f9: None,
+       0x00fa: None,
+       0x00fb: None,
+       0x00fc: None,
+       0x00fd: None,
+       0x00fe: None,
+       0x00ff: None,
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_7.py b/Lib/encodings/iso8859_7.py

index c84761098cb7a9b2ed1da732b0fa3aaa7487ea48..48f1bd58dbc35495414a8f80516044c1bdac7719 100644 (file)
--- a/Lib/encodings/iso8859_7.py
+++ b/Lib/encodings/iso8859_7.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-7.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-7.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,10 +35,14 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00a1: 0x2018, #       LEFT SINGLE QUOTATION MARK
         0x00a2: 0x2019, #       RIGHT SINGLE QUOTATION MARK
+       0x00a4: None,
+       0x00a5: None,
+       0x00aa: None,
+       0x00ae: None,
         0x00af: 0x2015, #       HORIZONTAL BAR
         0x00b4: 0x0384, #       GREEK TONOS
         0x00b5: 0x0385, #       GREEK DIALYTIKA TONOS
@@ -67,6 +71,7 @@ decoding_map = {
         0x00cf: 0x039f, #       GREEK CAPITAL LETTER OMICRON
         0x00d0: 0x03a0, #       GREEK CAPITAL LETTER PI
         0x00d1: 0x03a1, #       GREEK CAPITAL LETTER RHO
+       0x00d2: None,
         0x00d3: 0x03a3, #       GREEK CAPITAL LETTER SIGMA
         0x00d4: 0x03a4, #       GREEK CAPITAL LETTER TAU
         0x00d5: 0x03a5, #       GREEK CAPITAL LETTER UPSILON
@@ -111,7 +116,8 @@ decoding_map = {
         0x00fc: 0x03cc, #       GREEK SMALL LETTER OMICRON WITH TONOS
         0x00fd: 0x03cd, #       GREEK SMALL LETTER UPSILON WITH TONOS
         0x00fe: 0x03ce, #       GREEK SMALL LETTER OMEGA WITH TONOS
-}
+       0x00ff: None,
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_8.py b/Lib/encodings/iso8859_8.py

index 72b783b94092542b07e16ea1a17cad1c4d010de7..a19aa671cca45ffe1864f220953e6c8a156c83d6 100644 (file)
--- a/Lib/encodings/iso8859_8.py
+++ b/Lib/encodings/iso8859_8.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-8.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-8.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,11 +35,43 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+       0x00a1: None,
         0x00aa: 0x00d7, #       MULTIPLICATION SIGN
-       0x00af: 0x203e, #       OVERLINE
         0x00ba: 0x00f7, #       DIVISION SIGN
+       0x00bf: None,
+       0x00c0: None,
+       0x00c1: None,
+       0x00c2: None,
+       0x00c3: None,
+       0x00c4: None,
+       0x00c5: None,
+       0x00c6: None,
+       0x00c7: None,
+       0x00c8: None,
+       0x00c9: None,
+       0x00ca: None,
+       0x00cb: None,
+       0x00cc: None,
+       0x00cd: None,
+       0x00ce: None,
+       0x00cf: None,
+       0x00d0: None,
+       0x00d1: None,
+       0x00d2: None,
+       0x00d3: None,
+       0x00d4: None,
+       0x00d5: None,
+       0x00d6: None,
+       0x00d7: None,
+       0x00d8: None,
+       0x00d9: None,
+       0x00da: None,
+       0x00db: None,
+       0x00dc: None,
+       0x00dd: None,
+       0x00de: None,
         0x00df: 0x2017, #       DOUBLE LOW LINE
         0x00e0: 0x05d0, #       HEBREW LETTER ALEF
         0x00e1: 0x05d1, #       HEBREW LETTER BET
@@ -68,7 +100,12 @@ decoding_map = {
         0x00f8: 0x05e8, #       HEBREW LETTER RESH
         0x00f9: 0x05e9, #       HEBREW LETTER SHIN
         0x00fa: 0x05ea, #       HEBREW LETTER TAV
-}
+       0x00fb: None,
+       0x00fc: None,
+       0x00fd: 0x200e, #       LEFT-TO-RIGHT MARK
+       0x00fe: 0x200f, #       RIGHT-TO-LEFT MARK
+       0x00ff: None,
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/iso8859_9.py b/Lib/encodings/iso8859_9.py

index 3f91d32fb39785908b0f2d3cb0d7505c39b1d87d..a27890507a326701f6bd0500147c6be352fe7d05 100644 (file)
--- a/Lib/encodings/iso8859_9.py
+++ b/Lib/encodings/iso8859_9.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from '8859-9.TXT'.
-
+""" Python Character Mapping Codec generated from '8859-9.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,15 +35,15 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x00d0: 0x011e, #       LATIN CAPITAL LETTER G WITH BREVE
         0x00dd: 0x0130, #       LATIN CAPITAL LETTER I WITH DOT ABOVE
         0x00de: 0x015e, #       LATIN CAPITAL LETTER S WITH CEDILLA
         0x00f0: 0x011f, #       LATIN SMALL LETTER G WITH BREVE
         0x00fd: 0x0131, #       LATIN SMALL LETTER DOTLESS I
         0x00fe: 0x015f, #       LATIN SMALL LETTER S WITH CEDILLA
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/koi8_r.py b/Lib/encodings/koi8_r.py

index 0e1c15b1715f44a5d22fc7dd4df83dd638184c16..c28004ef6ecee37f36dc9101775860bd6de793a3 100644 (file)
--- a/Lib/encodings/koi8_r.py
+++ b/Lib/encodings/koi8_r.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'KOI8-R.TXT'.
-
+""" Python Character Mapping Codec generated from 'KOI8-R.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x2500, #       BOX DRAWINGS LIGHT HORIZONTAL
         0x0081: 0x2502, #       BOX DRAWINGS LIGHT VERTICAL
         0x0082: 0x250c, #       BOX DRAWINGS LIGHT DOWN AND RIGHT
@@ -165,7 +165,7 @@ decoding_map = {
         0x00fd: 0x0429, #       CYRILLIC CAPITAL LETTER SHCHA
         0x00fe: 0x0427, #       CYRILLIC CAPITAL LETTER CHE
         0x00ff: 0x042a, #       CYRILLIC CAPITAL LETTER HARD SIGN
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_cyrillic.py b/Lib/encodings/mac_cyrillic.py

index 1314836a5f66790b07720afb9d68cba5cd179216..45528319b22e6deaf0aa77fa17194693f1d65b4b 100644 (file)
--- a/Lib/encodings/mac_cyrillic.py
+++ b/Lib/encodings/mac_cyrillic.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'CYRILLIC.TXT'.
-
+""" Python Character Mapping Codec generated from 'CYRILLIC.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
         0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
         0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
@@ -160,7 +160,7 @@ decoding_map = {
         0x00fd: 0x044d, # CYRILLIC SMALL LETTER E
         0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU
         0x00ff: 0x00a4, # CURRENCY SIGN
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_greek.py b/Lib/encodings/mac_greek.py

index 7673b83e279697b778ac3294dfd6d2ed01c2606c..b7040c4bc4fe69842d96ce218540f934138ae217 100644 (file)
--- a/Lib/encodings/mac_greek.py
+++ b/Lib/encodings/mac_greek.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'GREEK.TXT'.
-
+""" Python Character Mapping Codec generated from 'GREEK.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
         0x0081: 0x00b9, # SUPERSCRIPT ONE
         0x0082: 0x00b2, # SUPERSCRIPT TWO
@@ -163,7 +163,7 @@ decoding_map = {
         0x00fd: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
         0x00fe: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
         0x00ff: None,   # UNDEFINED
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_iceland.py b/Lib/encodings/mac_iceland.py

index 62e1f6330d5110e977554166054a54721b6ddfd7..f20e1344fbabe1477122746143335dfc48bee8a8 100644 (file)
--- a/Lib/encodings/mac_iceland.py
+++ b/Lib/encodings/mac_iceland.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'ICELAND.TXT'.
-
+""" Python Character Mapping Codec generated from 'ICELAND.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
         0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
         0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@@ -159,7 +159,7 @@ decoding_map = {
         0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
         0x00fe: 0x02db, # OGONEK
         0x00ff: 0x02c7, # CARON
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_latin2.py b/Lib/encodings/mac_latin2.py

index 7e64959fffc67d2497f5bc78879a8a5f69358c05..0fba502cd6bfb75273a5b8f8a7e777b965e350a2 100644 (file)
--- a/Lib/encodings/mac_latin2.py
+++ b/Lib/encodings/mac_latin2.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'LATIN2.TXT'.
-
+""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
         0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
         0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON
@@ -163,7 +163,7 @@ decoding_map = {
         0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
         0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
         0x00ff: 0x02c7, # CARON
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_roman.py b/Lib/encodings/mac_roman.py

index 9147e93cd60ab12756903e57c9d2a1525f8457f2..6d048a3b6ea436b34718d561ebab3108abea3ed9 100644 (file)
--- a/Lib/encodings/mac_roman.py
+++ b/Lib/encodings/mac_roman.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'ROMAN.TXT'.
-
+""" Python Character Mapping Codec generated from 'ROMAN.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
         0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
         0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@@ -160,7 +160,7 @@ decoding_map = {
         0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
         0x00fe: 0x02db, # OGONEK
         0x00ff: 0x02c7, # CARON
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/encodings/mac_turkish.py b/Lib/encodings/mac_turkish.py

index 76b6bbe3f792deb3b00758784f157e8368466848..c81a8646695cf4c59af13dd9b681143f2924ac72 100644 (file)
--- a/Lib/encodings/mac_turkish.py
+++ b/Lib/encodings/mac_turkish.py
@@ -1,9 +1,9 @@
-""" Python Character Mapping Codec generated from 'TURKISH.TXT'.
-
+""" Python Character Mapping Codec generated from 'TURKISH.TXT' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -35,8 +35,8 @@ def getregentry():
  
  ### Decoding Map
  
-decoding_map = {
-
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
         0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
         0x0081: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
         0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
@@ -160,7 +160,7 @@ decoding_map = {
         0x00fd: 0x02dd, # DOUBLE ACUTE ACCENT
         0x00fe: 0x02db, # OGONEK
         0x00ff: 0x02c7, # CARON
-}
+})
  
  ### Encoding Map
  
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 05aecd87b67516b58200f36059b7b40646b6cb62..579bab1fe2723950bb7a21eecd38c02d3d2f75ea 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -494,14 +494,15 @@ for encoding in (
      'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
      'cp863', 'cp865', 'cp866',
      'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
-    'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6',
-    'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1',
+    'iso8859_2', 'iso8859_4', 'iso8859_5', 
+    'iso8859_9', 'koi8_r', 'latin_1',
      'mac_cyrillic', 'mac_latin2',
  
      ### These have undefined mappings:
      #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
      #'cp1256', 'cp1257', 'cp1258',
      #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874',
+    #'iso8859_3', 'iso8859_6', 'iso8859_7', 
      #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish',
  
      ### These fail the round-trip:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index fe591b52a4965951a7fb8fd0e9eb2addcf0097b8..b9e457d6a7beb553e60d36ccaef75f0b01b15876 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1970,11 +1970,11 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
         Py_DECREF(w);
         if (x == NULL) {
             if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-               /* No mapping found: default to Latin-1 mapping */
+               /* No mapping found means: mapping is undefined. */
                 PyErr_Clear();
-               *p++ = (Py_UNICODE)ch;
-               continue;
-           }
+               x = Py_None;
+               Py_INCREF(x);
+           } else
             goto onError;
         }
  
@@ -2086,16 +2086,11 @@ PyObject *PyUnicode_EncodeCharmap(const Py_UNICODE *p,
         Py_DECREF(w);
         if (x == NULL) {
             if (PyErr_ExceptionMatches(PyExc_LookupError)) {
-               /* No mapping found: default to Latin-1 mapping if possible */
+               /* No mapping found means: mapping is undefined. */
                 PyErr_Clear();
-               if (ch < 256) {
-                   *s++ = (char)ch;
-                   continue;
-               }
-               else if (!charmap_encoding_error(&p, &s, errors,
-                                    "missing character mapping"))
-                   continue;
-           }
+               x = Py_None;
+               Py_INCREF(x);
+           } else
             goto onError;
         }
  
diff --git a/Tools/scripts/gencodec.py b/Tools/scripts/gencodec.py

index 45b69b0abd1c19f8fd987117bd72e4d2e2f73dcd..39b42ffc05fa0560d7a8d75e4b06a47ab52c532c 100644 (file)
--- a/Tools/scripts/gencodec.py
+++ b/Tools/scripts/gencodec.py
@@ -1,9 +1,9 @@
  """ Unicode Mapping Parser and Codec Generator.
  
  This script parses Unicode mapping files as available from the Unicode
-site (ftp.unicode.org) and creates Python codec modules from them. The
-codecs use the standard character mapping codec to actually apply the
-mapping.
+site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
+modules from them. The codecs use the standard character mapping codec
+to actually apply the mapping.
  
  Synopsis: gencodec.py dir codec_prefix
  
@@ -18,6 +18,7 @@ same location (with .mapping extension).
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright Guido van Rossum, 2000.
  
  """#"
  
@@ -70,6 +71,10 @@ def readmap(filename,
      lines = f.readlines()
      f.close()
      enc2uni = {}
+    identity = []
+    unmapped = range(256)
+    for i in range(256):
+        unmapped[i] = i
      for line in lines:
          line = strip(line)
          if not line or line[0] == '#':
@@ -85,8 +90,22 @@ def readmap(filename,
              comment = ''
          else:
              comment = comment[1:]
-        if enc != uni:
+        if enc < 256:
+            unmapped.remove(enc)
+            if enc == uni:
+                identity.append(enc)
+            else:
+                enc2uni[enc] = (uni,comment)
+        else:
              enc2uni[enc] = (uni,comment)
+    # If there are more identity-mapped entries than unmapped entries,
+    # it pays to generate an identity dictionary first, add add explicit
+    # mappings to None for the rest
+    if len(identity)>=len(unmapped):
+        for enc in unmapped:
+            enc2uni[enc] = (None, "")
+        enc2uni['IDENTITY'] = 256
+
      return enc2uni
  
  def hexrepr(t,
@@ -143,11 +162,12 @@ def codegen(name,map,comments=1):
      """
      l = [
          '''\
-""" Python Character Mapping Codec generated from '%s'.
+""" Python Character Mapping Codec generated from '%s' with gencodec.py.
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
  
  (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright 2000 Guido van Rossum.
  
  """#"
  
@@ -178,15 +198,23 @@ def getregentry():
      return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
  
  ### Decoding Map
-
-decoding_map = {
  ''' % name,
          ]
+
+    if map.has_key("IDENTITY"):
+        l.append("decoding_map = codecs.make_identity_dict(range(%d))"
+                 % map["IDENTITY"])
+        l.append("decoding_map.update({")
+        splits = 1
+        del map["IDENTITY"]
+    else:
+        l.append("decoding_map = {")
+        splits = 0
+        
      mappings = map.items()
      mappings.sort()
      append = l.append
      i = 0
-    splits = 0
      for e,value in mappings:
          try:
              (u,c) = value
@@ -198,7 +226,7 @@ decoding_map = {
              append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
          else:
              append('\t%s: %s,' % (key,unicoderepr(u)))
-        i = i + 1
+        i += 1
          if i == 4096:
              # Split the definition into parts to that the Python
              # parser doesn't dump core
@@ -206,7 +234,7 @@ decoding_map = {
                  append('}')
              else:
                  append('})')
-            append('map.update({')
+            append('decoding_map.update({')
              i = 0
              splits = splits + 1
      if splits == 0:
@@ -265,7 +293,7 @@ def rewritepythondir(dir,prefix='',comments=1):
      
      mapnames = os.listdir(dir)
      for mapname in mapnames:
-        if mapname[-len('.mapping'):] != '.mapping':
+        if not mapname.endswith('.mapping'):
              continue
          codefile = mapname[:-len('.mapping')] + '.py'
          print 'converting %s to %s' % (mapname,
author	Marc-André Lemburg <mal@egenix.com>
	Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)
committer	Marc-André Lemburg <mal@egenix.com>
	Wed, 3 Jan 2001 21:29:14 +0000 (21:29 +0000)
Lib/codecs.py		patch \| blob \| history
Lib/encodings/cp037.py		patch \| blob \| history
Lib/encodings/cp1006.py		patch \| blob \| history
Lib/encodings/cp1026.py		patch \| blob \| history
Lib/encodings/cp1250.py		patch \| blob \| history
Lib/encodings/cp1251.py		patch \| blob \| history
Lib/encodings/cp1252.py		patch \| blob \| history
Lib/encodings/cp1253.py		patch \| blob \| history
Lib/encodings/cp1254.py		patch \| blob \| history
Lib/encodings/cp1255.py		patch \| blob \| history
Lib/encodings/cp1256.py		patch \| blob \| history
Lib/encodings/cp1257.py		patch \| blob \| history
Lib/encodings/cp1258.py		patch \| blob \| history
Lib/encodings/cp424.py		patch \| blob \| history
Lib/encodings/cp437.py		patch \| blob \| history
Lib/encodings/cp500.py		patch \| blob \| history
Lib/encodings/cp737.py		patch \| blob \| history
Lib/encodings/cp775.py		patch \| blob \| history
Lib/encodings/cp850.py		patch \| blob \| history
Lib/encodings/cp852.py		patch \| blob \| history
Lib/encodings/cp855.py		patch \| blob \| history
Lib/encodings/cp856.py		patch \| blob \| history
Lib/encodings/cp857.py		patch \| blob \| history
Lib/encodings/cp860.py		patch \| blob \| history
Lib/encodings/cp861.py		patch \| blob \| history
Lib/encodings/cp862.py		patch \| blob \| history
Lib/encodings/cp863.py		patch \| blob \| history
Lib/encodings/cp864.py		patch \| blob \| history
Lib/encodings/cp865.py		patch \| blob \| history
Lib/encodings/cp866.py		patch \| blob \| history
Lib/encodings/cp869.py		patch \| blob \| history
Lib/encodings/cp874.py		patch \| blob \| history
Lib/encodings/cp875.py		patch \| blob \| history
Lib/encodings/iso8859_1.py		patch \| blob \| history
Lib/encodings/iso8859_10.py		patch \| blob \| history
Lib/encodings/iso8859_13.py		patch \| blob \| history
Lib/encodings/iso8859_14.py		patch \| blob \| history
Lib/encodings/iso8859_15.py		patch \| blob \| history
Lib/encodings/iso8859_2.py		patch \| blob \| history
Lib/encodings/iso8859_3.py		patch \| blob \| history
Lib/encodings/iso8859_4.py		patch \| blob \| history
Lib/encodings/iso8859_5.py		patch \| blob \| history
Lib/encodings/iso8859_6.py		patch \| blob \| history
Lib/encodings/iso8859_7.py		patch \| blob \| history
Lib/encodings/iso8859_8.py		patch \| blob \| history
Lib/encodings/iso8859_9.py		patch \| blob \| history
Lib/encodings/koi8_r.py		patch \| blob \| history
Lib/encodings/mac_cyrillic.py		patch \| blob \| history
Lib/encodings/mac_greek.py		patch \| blob \| history
Lib/encodings/mac_iceland.py		patch \| blob \| history
Lib/encodings/mac_latin2.py		patch \| blob \| history
Lib/encodings/mac_roman.py		patch \| blob \| history
Lib/encodings/mac_turkish.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Tools/scripts/gencodec.py		patch \| blob \| history