]> granicus.if.org Git - python/commitdiff
locale.py now uses an updated locale alias table (built using
authorMarc-André Lemburg <mal@egenix.com>
Fri, 10 Dec 2004 21:58:14 +0000 (21:58 +0000)
committerMarc-André Lemburg <mal@egenix.com>
Fri, 10 Dec 2004 21:58:14 +0000 (21:58 +0000)
Tools/i18n/makelocalealias.py, a tool to parse the X11 locale
alias file); the encoding lookup was enhanced to use Python's
encoding alias table

As sige-effect, this fixes SF bug [ 1080864 ] locale.py doesn't recognize
valid locale setting.

Tools/i18n/makelocalealias.py [new file with mode: 0644]

index 6c6eeeafae8903affddd6a8385b9186d402703e0..82ff28f72ae760f18897a4bb82f049461d770b60 100644 (file)
@@ -11,7 +11,7 @@
-import sys
+import sys, encodings, encodings.aliases
 # Try importing the _locale module.
@@ -223,22 +223,35 @@ def normalize(localename):
         encoding = ''
     # First lookup: fullname (possibly with encoding)
-    code = locale_alias.get(fullname, None)
+    norm_encoding = encoding.replace('-', '')
+    norm_encoding = norm_encoding.replace('_', '')
+    lookup_name = langname + '.' + encoding
+    code = locale_alias.get(lookup_name, None)
     if code is not None:
         return code
+    #print 'first lookup failed'
     # Second try: langname (without encoding)
     code = locale_alias.get(langname, None)
     if code is not None:
+        #print 'langname lookup succeeded'
         if '.' in code:
             langname, defenc = code.split('.')
             langname = code
             defenc = ''
         if encoding:
-            encoding = encoding_alias.get(encoding, encoding)
+            # Convert the encoding to a C lib compatible encoding string
+            norm_encoding = encodings.normalize_encoding(encoding)
+            #print 'norm encoding: %r' % norm_encoding
+            norm_encoding = encodings.aliases.aliases.get(norm_encoding,
+                                                          norm_encoding)
+            #print 'aliased encoding: %r' % norm_encoding
+            encoding = locale_encoding_alias.get(norm_encoding,
+                                                 norm_encoding)
             encoding = defenc
+        #print 'found encoding %r' % encoding
         if encoding:
             return langname + '.' + encoding
@@ -429,41 +442,49 @@ else:
-# The encoding_alias table maps lowercase encoding alias names to C
-# locale encoding names (case-sensitive).
+# The local_encoding_alias table maps lowercase encoding alias names
+# to C locale encoding names (case-sensitive). Note that normalize()
+# first looks up the encoding in the encodings.aliases dictionary and
+# then applies this mapping to find the correct C lib name for the
+# encoding.
-encoding_alias = {
-        '437':                          'C',
-        'c':                            'C',
-        'iso8859':                      'ISO8859-1',
-        '8859':                         'ISO8859-1',
-        '88591':                        'ISO8859-1',
-        'ascii':                        'ISO8859-1',
-        'en':                           'ISO8859-1',
-        'iso88591':                     'ISO8859-1',
-        'iso_8859-1':                   'ISO8859-1',
-        '885915':                       'ISO8859-15',
-        'iso885915':                    'ISO8859-15',
-        'iso_8859-15':                  'ISO8859-15',
-        'iso8859-2':                    'ISO8859-2',
-        'iso88592':                     'ISO8859-2',
-        'iso_8859-2':                   'ISO8859-2',
-        'iso88595':                     'ISO8859-5',
-        'iso88596':                     'ISO8859-6',
-        'iso88597':                     'ISO8859-7',
-        'iso88598':                     'ISO8859-8',
-        'iso88599':                     'ISO8859-9',
-        'iso-2022-jp':                  'JIS7',
-        'jis':                          'JIS7',
-        'jis7':                         'JIS7',
-        'sjis':                         'SJIS',
-        'tis620':                       'TACTIS',
-        'ajec':                         'eucJP',
-        'eucjp':                        'eucJP',
-        'ujis':                         'eucJP',
-        'utf-8':                        'utf',
-        'utf8':                         'utf',
-        'utf8@ucs4':                    'utf',
+locale_encoding_alias = {
+    # Mappings for non-standard encoding names used in locale names
+    '437':                          'C',
+    'c':                            'C',
+    'en':                           'ISO8859-1',
+    'jis':                          'JIS7',
+    'jis7':                         'JIS7',
+    'ajec':                         'eucJP',
+    # Mappings from Python codec names to C lib encoding names
+    'ascii':                        'ISO8859-1',
+    'latin_1':                      'ISO8859-1',
+    'iso8859_1':                    'ISO8859-1',
+    'iso8859_10':                   'ISO8859-10',
+    'iso8859_11':                   'ISO8859-11',
+    'iso8859_13':                   'ISO8859-13',
+    'iso8859_14':                   'ISO8859-14',
+    'iso8859_15':                   'ISO8859-15',
+    'iso8859_2':                    'ISO8859-2',
+    'iso8859_3':                    'ISO8859-3',
+    'iso8859_4':                    'ISO8859-4',
+    'iso8859_5':                    'ISO8859-5',
+    'iso8859_6':                    'ISO8859-6',
+    'iso8859_7':                    'ISO8859-7',
+    'iso8859_8':                    'ISO8859-8',
+    'iso8859_9':                    'ISO8859-9',
+    'iso2022_jp':                   'JIS7',
+    'shift_jis':                    'SJIS',
+    'tactis':                       'TACTIS',
+    'euc_jp':                       'eucJP',
+    'euc_kr':                       'eucKR',
+    'utf-8':                        'UTF-8',
+    'koi8_r':                       'KOI8-R',
+    'koi8_u':                       'KOI8-U',
+    # XXX This list is still incomplete. If you know more
+    # mappings, please file a bug report. Thanks.
@@ -473,189 +494,820 @@ encoding_alias = {
 # language name is needed to interpret the given encoding alias
 # correctly (CJK codes often have this need).
+# Note that the normalize() function which uses this tables
+# removes '_' and '-' characters from the encoding part of the
+# locale name before doing the lookup. This saves a lot of
+# space in the table.
+# MAL 2004-12-10:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+# These are the differences compared to the old mapping (Python 2.4
+# and older):
+#    updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+#    updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
+#    updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
+#    updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+#    updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+#    updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+#    updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+#    updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+#    updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
+#    updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
+#    updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
+#    updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+#    updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+#    updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
+#    updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
+#    updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
+#    updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
 locale_alias = {
-        'american':                      'en_US.ISO8859-1',
-        'ar':                            'ar_AA.ISO8859-6',
-        'ar_aa':                         'ar_AA.ISO8859-6',
-        'ar_sa':                         'ar_SA.ISO8859-6',
-        'arabic':                        'ar_AA.ISO8859-6',
-        'bg':                            'bg_BG.ISO8859-5',
-        'bg_bg':                         'bg_BG.ISO8859-5',
-        'bulgarian':                     'bg_BG.ISO8859-5',
-        'c-french':                      'fr_CA.ISO8859-1',
-        'c':                             'C',
-        'c_c':                           'C',
-        'cextend':                       'en_US.ISO8859-1',
-        'chinese-s':                     'zh_CN.eucCN',
-        'chinese-t':                     'zh_TW.eucTW',
-        'croatian':                      'hr_HR.ISO8859-2',
-        'cs':                            'cs_CZ.ISO8859-2',
-        'cs_cs':                         'cs_CZ.ISO8859-2',
-        'cs_cz':                         'cs_CZ.ISO8859-2',
-        'cz':                            'cz_CZ.ISO8859-2',
-        'cz_cz':                         'cz_CZ.ISO8859-2',
-        'czech':                         'cs_CS.ISO8859-2',
-        'da':                            'da_DK.ISO8859-1',
-        'da_dk':                         'da_DK.ISO8859-1',
-        'danish':                        'da_DK.ISO8859-1',
-        'de':                            'de_DE.ISO8859-1',
-        'de_at':                         'de_AT.ISO8859-1',
-        'de_ch':                         'de_CH.ISO8859-1',
-        'de_de':                         'de_DE.ISO8859-1',
-        'dutch':                         'nl_BE.ISO8859-1',
-        'ee':                            'ee_EE.ISO8859-4',
-        'el':                            'el_GR.ISO8859-7',
-        'el_gr':                         'el_GR.ISO8859-7',
-        'en':                            'en_US.ISO8859-1',
-        'en_au':                         'en_AU.ISO8859-1',
-        'en_ca':                         'en_CA.ISO8859-1',
-        'en_gb':                         'en_GB.ISO8859-1',
-        'en_ie':                         'en_IE.ISO8859-1',
-        'en_nz':                         'en_NZ.ISO8859-1',
-        'en_uk':                         'en_GB.ISO8859-1',
-        'en_us':                         'en_US.ISO8859-1',
-        'eng_gb':                        'en_GB.ISO8859-1',
-        'english':                       'en_EN.ISO8859-1',
-        'english_uk':                    'en_GB.ISO8859-1',
-        'english_united-states':         'en_US.ISO8859-1',
-        'english_us':                    'en_US.ISO8859-1',
-        'es':                            'es_ES.ISO8859-1',
-        'es_ar':                         'es_AR.ISO8859-1',
-        'es_bo':                         'es_BO.ISO8859-1',
-        'es_cl':                         'es_CL.ISO8859-1',
-        'es_co':                         'es_CO.ISO8859-1',
-        'es_cr':                         'es_CR.ISO8859-1',
-        'es_ec':                         'es_EC.ISO8859-1',
-        'es_es':                         'es_ES.ISO8859-1',
-        'es_gt':                         'es_GT.ISO8859-1',
-        'es_mx':                         'es_MX.ISO8859-1',
-        'es_ni':                         'es_NI.ISO8859-1',
-        'es_pa':                         'es_PA.ISO8859-1',
-        'es_pe':                         'es_PE.ISO8859-1',
-        'es_py':                         'es_PY.ISO8859-1',
-        'es_sv':                         'es_SV.ISO8859-1',
-        'es_uy':                         'es_UY.ISO8859-1',
-        'es_ve':                         'es_VE.ISO8859-1',
-        'et':                            'et_EE.ISO8859-4',
-        'et_ee':                         'et_EE.ISO8859-4',
-        'fi':                            'fi_FI.ISO8859-1',
-        'fi_fi':                         'fi_FI.ISO8859-1',
-        'finnish':                       'fi_FI.ISO8859-1',
-        'fr':                            'fr_FR.ISO8859-1',
-        'fr_be':                         'fr_BE.ISO8859-1',
-        'fr_ca':                         'fr_CA.ISO8859-1',
-        'fr_ch':                         'fr_CH.ISO8859-1',
-        'fr_fr':                         'fr_FR.ISO8859-1',
-        'fre_fr':                        'fr_FR.ISO8859-1',
-        'french':                        'fr_FR.ISO8859-1',
-        'french_france':                 'fr_FR.ISO8859-1',
-        'ger_de':                        'de_DE.ISO8859-1',
-        'german':                        'de_DE.ISO8859-1',
-        'german_germany':                'de_DE.ISO8859-1',
-        'greek':                         'el_GR.ISO8859-7',
-        'hebrew':                        'iw_IL.ISO8859-8',
-        'hr':                            'hr_HR.ISO8859-2',
-        'hr_hr':                         'hr_HR.ISO8859-2',
-        'hu':                            'hu_HU.ISO8859-2',
-        'hu_hu':                         'hu_HU.ISO8859-2',
-        'hungarian':                     'hu_HU.ISO8859-2',
-        'icelandic':                     'is_IS.ISO8859-1',
-        'id':                            'id_ID.ISO8859-1',
-        'id_id':                         'id_ID.ISO8859-1',
-        'is':                            'is_IS.ISO8859-1',
-        'is_is':                         'is_IS.ISO8859-1',
-        'iso-8859-1':                    'en_US.ISO8859-1',
-        'iso-8859-15':                   'en_US.ISO8859-15',
-        'iso8859-1':                     'en_US.ISO8859-1',
-        'iso8859-15':                    'en_US.ISO8859-15',
-        'iso_8859_1':                    'en_US.ISO8859-1',
-        'iso_8859_15':                   'en_US.ISO8859-15',
-        'it':                            'it_IT.ISO8859-1',
-        'it_ch':                         'it_CH.ISO8859-1',
-        'it_it':                         'it_IT.ISO8859-1',
-        'italian':                       'it_IT.ISO8859-1',
-        'iw':                            'iw_IL.ISO8859-8',
-        'iw_il':                         'iw_IL.ISO8859-8',
-        'ja':                            'ja_JP.eucJP',
-        'ja.jis':                        'ja_JP.JIS7',
-        'ja.sjis':                       'ja_JP.SJIS',
-        'ja_jp':                         'ja_JP.eucJP',
-        'ja_jp.ajec':                    'ja_JP.eucJP',
-        'ja_jp.euc':                     'ja_JP.eucJP',
-        'ja_jp.eucjp':                   'ja_JP.eucJP',
-        'ja_jp.iso-2022-jp':             'ja_JP.JIS7',
-        'ja_jp.jis':                     'ja_JP.JIS7',
-        'ja_jp.jis7':                    'ja_JP.JIS7',
-        'ja_jp.mscode':                  'ja_JP.SJIS',
-        'ja_jp.sjis':                    'ja_JP.SJIS',
-        'ja_jp.ujis':                    'ja_JP.eucJP',
-        'japan':                         'ja_JP.eucJP',
-        'japanese':                      'ja_JP.SJIS',
-        'japanese-euc':                  'ja_JP.eucJP',
-        'japanese.euc':                  'ja_JP.eucJP',
-        'jp_jp':                         'ja_JP.eucJP',
-        'ko':                            'ko_KR.eucKR',
-        'ko_kr':                         'ko_KR.eucKR',
-        'ko_kr.euc':                     'ko_KR.eucKR',
-        'korean':                        'ko_KR.eucKR',
-        'lt':                            'lt_LT.ISO8859-4',
-        'lv':                            'lv_LV.ISO8859-4',
-        'mk':                            'mk_MK.ISO8859-5',
-        'mk_mk':                         'mk_MK.ISO8859-5',
-        'nl':                            'nl_NL.ISO8859-1',
-        'nl_be':                         'nl_BE.ISO8859-1',
-        'nl_nl':                         'nl_NL.ISO8859-1',
-        'no':                            'no_NO.ISO8859-1',
-        'no_no':                         'no_NO.ISO8859-1',
-        'norwegian':                     'no_NO.ISO8859-1',
-        'pl':                            'pl_PL.ISO8859-2',
-        'pl_pl':                         'pl_PL.ISO8859-2',
-        'polish':                        'pl_PL.ISO8859-2',
-        'portuguese':                    'pt_PT.ISO8859-1',
-        'portuguese_brazil':             'pt_BR.ISO8859-1',
-        'posix':                         'C',
-        'posix-utf2':                    'C',
-        'pt':                            'pt_PT.ISO8859-1',
-        'pt_br':                         'pt_BR.ISO8859-1',
-        'pt_pt':                         'pt_PT.ISO8859-1',
-        'ro':                            'ro_RO.ISO8859-2',
-        'ro_ro':                         'ro_RO.ISO8859-2',
-        'ru':                            'ru_RU.ISO8859-5',
-        'ru_ru':                         'ru_RU.ISO8859-5',
-        'rumanian':                      'ro_RO.ISO8859-2',
-        'russian':                       'ru_RU.ISO8859-5',
-        'serbocroatian':                 'sh_YU.ISO8859-2',
-        'sh':                            'sh_YU.ISO8859-2',
-        'sh_hr':                         'sh_HR.ISO8859-2',
-        'sh_sp':                         'sh_YU.ISO8859-2',
-        'sh_yu':                         'sh_YU.ISO8859-2',
-        'sk':                            'sk_SK.ISO8859-2',
-        'sk_sk':                         'sk_SK.ISO8859-2',
-        'sl':                            'sl_CS.ISO8859-2',
-        'sl_cs':                         'sl_CS.ISO8859-2',
-        'sl_si':                         'sl_SI.ISO8859-2',
-        'slovak':                        'sk_SK.ISO8859-2',
-        'slovene':                       'sl_CS.ISO8859-2',
-        'sp':                            'sp_YU.ISO8859-5',
-        'sp_yu':                         'sp_YU.ISO8859-5',
-        'spanish':                       'es_ES.ISO8859-1',
-        'spanish_spain':                 'es_ES.ISO8859-1',
-        'sr_sp':                         'sr_SP.ISO8859-2',
-        'sv':                            'sv_SE.ISO8859-1',
-        'sv_se':                         'sv_SE.ISO8859-1',
-        'swedish':                       'sv_SE.ISO8859-1',
-        'th_th':                         'th_TH.TACTIS',
-        'tr':                            'tr_TR.ISO8859-9',
-        'tr_tr':                         'tr_TR.ISO8859-9',
-        'turkish':                       'tr_TR.ISO8859-9',
-        'univ':                          'en_US.utf',
-        'universal':                     'en_US.utf',
-        'zh':                            'zh_CN.eucCN',
-        'zh_cn':                         'zh_CN.eucCN',
-        'zh_cn.big5':                    'zh_TW.eucTW',
-        'zh_cn.euc':                     'zh_CN.eucCN',
-        'zh_tw':                         'zh_TW.eucTW',
-        'zh_tw.euc':                     'zh_TW.eucTW',
+    'a3':                                   'a3_AZ.KOI8-C',
+    'a3_az':                                'a3_AZ.KOI8-C',
+    'a3_az.koi8c':                          'a3_AZ.KOI8-C',
+    'af':                                   'af_ZA.ISO8859-1',
+    'af_za':                                'af_ZA.ISO8859-1',
+    'af_za.iso88591':                       'af_ZA.ISO8859-1',
+    'am':                                   'am_ET.UTF-8',
+    'american':                             'en_US.ISO8859-1',
+    'american.iso88591':                    'en_US.ISO8859-1',
+    'ar':                                   'ar_AA.ISO8859-6',
+    'ar_aa':                                'ar_AA.ISO8859-6',
+    'ar_aa.iso88596':                       'ar_AA.ISO8859-6',
+    'ar_aa.utf8':                           'ar_AA.UTF-8',
+    'ar_ae':                                'ar_AE.ISO8859-6',
+    'ar_ae.utf8':                           'ar_AE.UTF-8',
+    'ar_bh':                                'ar_BH.ISO8859-6',
+    'ar_bh.utf8':                           'ar_BH.UTF-8',
+    'ar_dz':                                'ar_DZ.ISO8859-6',
+    'ar_dz.utf8':                           'ar_DZ.UTF-8',
+    'ar_eg':                                'ar_EG.ISO8859-6',
+    'ar_eg.iso88596':                       'ar_EG.ISO8859-6',
+    'ar_eg.utf8':                           'ar_EG.UTF-8',
+    'ar_iq':                                'ar_IQ.ISO8859-6',
+    'ar_iq.utf8':                           'ar_IQ.UTF-8',
+    'ar_jo':                                'ar_JO.ISO8859-6',
+    'ar_jo.utf8':                           'ar_JO.UTF-8',
+    'ar_kw':                                'ar_KW.ISO8859-6',
+    'ar_kw.utf8':                           'ar_KW.UTF-8',
+    'ar_lb':                                'ar_LB.ISO8859-6',
+    'ar_lb.utf8':                           'ar_LB.UTF-8',
+    'ar_ly':                                'ar_LY.ISO8859-6',
+    'ar_ly.utf8':                           'ar_LY.UTF-8',
+    'ar_ma':                                'ar_MA.ISO8859-6',
+    'ar_ma.utf8':                           'ar_MA.UTF-8',
+    'ar_om':                                'ar_OM.ISO8859-6',
+    'ar_om.utf8':                           'ar_OM.UTF-8',
+    'ar_qa':                                'ar_QA.ISO8859-6',
+    'ar_qa.utf8':                           'ar_QA.UTF-8',
+    'ar_sa':                                'ar_SA.ISO8859-6',
+    'ar_sa.iso88596':                       'ar_SA.ISO8859-6',
+    'ar_sa.utf8':                           'ar_SA.UTF-8',
+    'ar_sd':                                'ar_SD.ISO8859-6',
+    'ar_sd.utf8':                           'ar_SD.UTF-8',
+    'ar_sy':                                'ar_SY.ISO8859-6',
+    'ar_sy.utf8':                           'ar_SY.UTF-8',
+    'ar_tn':                                'ar_TN.ISO8859-6',
+    'ar_tn.utf8':                           'ar_TN.UTF-8',
+    'ar_ye':                                'ar_YE.ISO8859-6',
+    'ar_ye.utf8':                           'ar_YE.UTF-8',
+    'arabic':                               'ar_AA.ISO8859-6',
+    'arabic.iso88596':                      'ar_AA.ISO8859-6',
+    'az':                                   'az_AZ.ISO8859-9E',
+    'az_az':                                'az_AZ.ISO8859-9E',
+    'az_az.iso88599e':                      'az_AZ.ISO8859-9E',
+    'be':                                   'be_BY.CP1251',
+    'be_bg.utf8':                           'be_BG.UTF-8',
+    'be_by':                                'be_BY.CP1251',
+    'be_by.cp1251':                         'be_BY.CP1251',
+    'be_by.microsoftcp1251':                'be_BY.CP1251',
+    'be_by.utf8':                           'be_BY.UTF-8',
+    'bg':                                   'bg_BG.CP1251',
+    'bg_bg':                                'bg_BG.CP1251',
+    'bg_bg.cp1251':                         'bg_BG.CP1251',
+    'bg_bg.iso88595':                       'bg_BG.ISO8859-5',
+    'bg_bg.koi8r':                          'bg_BG.KOI8-R',
+    'bg_bg.microsoftcp1251':                'bg_BG.CP1251',
+    'bokmal':                               'nb_NO.ISO8859-1',
+    'bokm\xe5l':                            'nb_NO.ISO8859-1',
+    'br':                                   'br_FR.ISO8859-1',
+    'br_fr':                                'br_FR.ISO8859-1',
+    'br_fr.iso88591':                       'br_FR.ISO8859-1',
+    'br_fr.iso885914':                      'br_FR.ISO8859-14',
+    'br_fr.iso885915':                      'br_FR.ISO8859-15',
+    'br_fr@euro':                           'br_FR.ISO8859-15',
+    'bulgarian':                            'bg_BG.CP1251',
+    'c':                                    'C',
+    'c-french':                             'fr_CA.ISO8859-1',
+    'c-french.iso88591':                    'fr_CA.ISO8859-1',
+    'c.en':                                 'C',
+    'c.iso88591':                           'en_US.ISO8859-1',
+    'c_c':                                  'C',
+    'c_c.c':                                'C',
+    'ca':                                   'ca_ES.ISO8859-1',
+    'ca_es':                                'ca_ES.ISO8859-1',
+    'ca_es.iso88591':                       'ca_ES.ISO8859-1',
+    'ca_es.iso885915':                      'ca_ES.ISO8859-15',
+    'ca_es.utf8':                           'ca_ES.UTF-8',
+    'ca_es@euro':                           'ca_ES.ISO8859-15',
+    'catalan':                              'ca_ES.ISO8859-1',
+    'cextend':                              'en_US.ISO8859-1',
+    'cextend.en':                           'en_US.ISO8859-1',
+    'chinese-s':                            'zh_CN.eucCN',
+    'chinese-t':                            'zh_TW.eucTW',
+    'croatian':                             'hr_HR.ISO8859-2',
+    'cs':                                   'cs_CZ.ISO8859-2',
+    'cs_cs':                                'cs_CZ.ISO8859-2',
+    'cs_cs.iso88592':                       'cs_CZ.ISO8859-2',
+    'cs_cz':                                'cs_CZ.ISO8859-2',
+    'cs_cz.iso88592':                       'cs_CZ.ISO8859-2',
+    'cs_cz.utf8':                           'cs_CZ.UTF-8',
+    'cy':                                   'cy_GB.ISO8859-1',
+    'cy_gb':                                'cy_GB.ISO8859-1',
+    'cy_gb.iso88591':                       'cy_GB.ISO8859-1',
+    'cy_gb.iso885914':                      'cy_GB.ISO8859-14',
+    'cy_gb.iso885915':                      'cy_GB.ISO8859-15',
+    'cy_gb@euro':                           'cy_GB.ISO8859-15',
+    'cz':                                   'cs_CZ.ISO8859-2',
+    'cz_cz':                                'cs_CZ.ISO8859-2',
+    'czech':                                'cs_CZ.ISO8859-2',
+    'da':                                   'da_DK.ISO8859-1',
+    'da_dk':                                'da_DK.ISO8859-1',
+    'da_dk.88591':                          'da_DK.ISO8859-1',
+    'da_dk.885915':                         'da_DK.ISO8859-15',
+    'da_dk.iso88591':                       'da_DK.ISO8859-1',
+    'da_dk.iso885915':                      'da_DK.ISO8859-15',
+    'da_dk.utf8':                           'da_DK.UTF-8',
+    'da_dk@euro':                           'da_DK.ISO8859-15',
+    'danish':                               'da_DK.ISO8859-1',
+    'danish.iso88591':                      'da_DK.ISO8859-1',
+    'dansk':                                'da_DK.ISO8859-1',
+    'de':                                   'de_DE.ISO8859-1',
+    'de_at':                                'de_AT.ISO8859-1',
+    'de_at.iso88591':                       'de_AT.ISO8859-1',
+    'de_at.iso885915':                      'de_AT.ISO8859-15',
+    'de_at.utf8':                           'de_AT.UTF-8',
+    'de_at@euro':                           'de_AT.ISO8859-15',
+    'de_be':                                'de_BE.ISO8859-1',
+    'de_be.iso88591':                       'de_BE.ISO8859-1',
+    'de_be.iso885915':                      'de_BE.ISO8859-15',
+    'de_be@euro':                           'de_BE.ISO8859-15',
+    'de_ch':                                'de_CH.ISO8859-1',
+    'de_ch.iso88591':                       'de_CH.ISO8859-1',
+    'de_ch.iso885915':                      'de_CH.ISO8859-15',
+    'de_ch.utf8':                           'de_CH.UTF-8',
+    'de_ch@euro':                           'de_CH.ISO8859-15',
+    'de_de':                                'de_DE.ISO8859-1',
+    'de_de.88591':                          'de_DE.ISO8859-1',
+    'de_de.885915':                         'de_DE.ISO8859-15',
+    'de_de.885915@euro':                    'de_DE.ISO8859-15',
+    'de_de.iso88591':                       'de_DE.ISO8859-1',
+    'de_de.iso885915':                      'de_DE.ISO8859-15',
+    'de_de.utf8':                           'de_DE.UTF-8',
+    'de_de@euro':                           'de_DE.ISO8859-15',
+    'de_lu':                                'de_LU.ISO8859-1',
+    'de_lu.iso88591':                       'de_LU.ISO8859-1',
+    'de_lu.iso885915':                      'de_LU.ISO8859-15',
+    'de_lu.utf8':                           'de_LU.UTF-8',
+    'de_lu@euro':                           'de_LU.ISO8859-15',
+    'deutsch':                              'de_DE.ISO8859-1',
+    'dutch':                                'nl_NL.ISO8859-1',
+    'dutch.iso88591':                       'nl_BE.ISO8859-1',
+    'ee':                                   'ee_EE.ISO8859-4',
+    'ee_ee':                                'ee_EE.ISO8859-4',
+    'ee_ee.iso88594':                       'ee_EE.ISO8859-4',
+    'eesti':                                'et_EE.ISO8859-1',
+    'el':                                   'el_GR.ISO8859-7',
+    'el_gr':                                'el_GR.ISO8859-7',
+    'el_gr.iso88597':                       'el_GR.ISO8859-7',
+    'el_gr.utf8':                           'el_GR.UTF-8',
+    'el_gr@euro':                           'el_GR.ISO8859-15',
+    'en':                                   'en_US.ISO8859-1',
+    'en.iso88591':                          'en_US.ISO8859-1',
+    'en_au':                                'en_AU.ISO8859-1',
+    'en_au.iso88591':                       'en_AU.ISO8859-1',
+    'en_au.utf8':                           'en_AU.UTF-8',
+    'en_be':                                'en_BE.ISO8859-1',
+    'en_be.utf8':                           'en_BE.UTF-8',
+    'en_be@euro':                           'en_BE.ISO8859-15',
+    'en_bw':                                'en_BW.ISO8859-1',
+    'en_ca':                                'en_CA.ISO8859-1',
+    'en_ca.iso88591':                       'en_CA.ISO8859-1',
+    'en_ca.utf8':                           'en_CA.UTF-8',
+    'en_gb':                                'en_GB.ISO8859-1',
+    'en_gb.88591':                          'en_GB.ISO8859-1',
+    'en_gb.iso88591':                       'en_GB.ISO8859-1',
+    'en_gb.iso885915':                      'en_GB.ISO8859-15',
+    'en_gb.utf8':                           'en_GB.UTF-8',
+    'en_gb@euro':                           'en_GB.ISO8859-15',
+    'en_hk':                                'en_HK.ISO8859-1',
+    'en_ie':                                'en_IE.ISO8859-1',
+    'en_ie.iso88591':                       'en_IE.ISO8859-1',
+    'en_ie.iso885915':                      'en_IE.ISO8859-15',
+    'en_ie.utf8':                           'en_IE.UTF-8',
+    'en_ie@euro':                           'en_IE.ISO8859-15',
+    'en_in':                                'en_IN.ISO8859-1',
+    'en_nz':                                'en_NZ.ISO8859-1',
+    'en_nz.iso88591':                       'en_NZ.ISO8859-1',
+    'en_nz.utf8':                           'en_NZ.UTF-8',
+    'en_ph':                                'en_PH.ISO8859-1',
+    'en_sg':                                'en_SG.ISO8859-1',
+    'en_uk':                                'en_GB.ISO8859-1',
+    'en_us':                                'en_US.ISO8859-1',
+    'en_us.88591':                          'en_US.ISO8859-1',
+    'en_us.885915':                         'en_US.ISO8859-15',
+    'en_us.iso88591':                       'en_US.ISO8859-1',
+    'en_us.iso885915':                      'en_US.ISO8859-15',
+    'en_us.iso885915@euro':                 'en_US.ISO8859-15',
+    'en_us.utf8':                           'en_US.UTF-8',
+    'en_us@euro':                           'en_US.ISO8859-15',
+    'en_us@euro@euro':                      'en_US.ISO8859-15',
+    'en_za':                                'en_ZA.ISO8859-1',
+    'en_za.88591':                          'en_ZA.ISO8859-1',
+    'en_za.iso88591':                       'en_ZA.ISO8859-1',
+    'en_za.iso885915':                      'en_ZA.ISO8859-15',
+    'en_za.utf8':                           'en_ZA.UTF-8',
+    'en_za@euro':                           'en_ZA.ISO8859-15',
+    'en_zw':                                'en_ZW.ISO8859-1',
+    'eng_gb':                               'en_GB.ISO8859-1',
+    'eng_gb.8859':                          'en_GB.ISO8859-1',
+    'english':                              'en_EN.ISO8859-1',
+    'english.iso88591':                     'en_EN.ISO8859-1',
+    'english_uk':                           'en_GB.ISO8859-1',
+    'english_uk.8859':                      'en_GB.ISO8859-1',
+    'english_united-states':                'en_US.ISO8859-1',
+    'english_united-states.437':            'C',
+    'english_us':                           'en_US.ISO8859-1',
+    'english_us.8859':                      'en_US.ISO8859-1',
+    'english_us.ascii':                     'en_US.ISO8859-1',
+    'eo':                                   'eo_XX.ISO8859-3',
+    'eo_eo':                                'eo_EO.ISO8859-3',
+    'eo_eo.iso88593':                       'eo_EO.ISO8859-3',
+    'eo_xx':                                'eo_XX.ISO8859-3',
+    'eo_xx.iso88593':                       'eo_XX.ISO8859-3',
+    'es':                                   'es_ES.ISO8859-1',
+    'es_ar':                                'es_AR.ISO8859-1',
+    'es_ar.iso88591':                       'es_AR.ISO8859-1',
+    'es_ar.utf8':                           'es_AR.UTF-8',
+    'es_bo':                                'es_BO.ISO8859-1',
+    'es_bo.iso88591':                       'es_BO.ISO8859-1',
+    'es_bo.utf8':                           'es_BO.UTF-8',
+    'es_cl':                                'es_CL.ISO8859-1',
+    'es_cl.iso88591':                       'es_CL.ISO8859-1',
+    'es_cl.utf8':                           'es_CL.UTF-8',
+    'es_co':                                'es_CO.ISO8859-1',
+    'es_co.iso88591':                       'es_CO.ISO8859-1',
+    'es_co.utf8':                           'es_CO.UTF-8',
+    'es_cr':                                'es_CR.ISO8859-1',
+    'es_cr.iso88591':                       'es_CR.ISO8859-1',
+    'es_cr.utf8':                           'es_CR.UTF-8',
+    'es_do':                                'es_DO.ISO8859-1',
+    'es_do.iso88591':                       'es_DO.ISO8859-1',
+    'es_do.utf8':                           'es_DO.UTF-8',
+    'es_ec':                                'es_EC.ISO8859-1',
+    'es_ec.iso88591':                       'es_EC.ISO8859-1',
+    'es_ec.utf8':                           'es_EC.UTF-8',
+    'es_es':                                'es_ES.ISO8859-1',
+    'es_es.88591':                          'es_ES.ISO8859-1',
+    'es_es.iso88591':                       'es_ES.ISO8859-1',
+    'es_es.iso885915':                      'es_ES.ISO8859-15',
+    'es_es.utf8':                           'es_ES.UTF-8',
+    'es_es@euro':                           'es_ES.ISO8859-15',
+    'es_gt':                                'es_GT.ISO8859-1',
+    'es_gt.iso88591':                       'es_GT.ISO8859-1',
+    'es_gt.utf8':                           'es_GT.UTF-8',
+    'es_hn':                                'es_HN.ISO8859-1',
+    'es_hn.iso88591':                       'es_HN.ISO8859-1',
+    'es_hn.utf8':                           'es_HN.UTF-8',
+    'es_mx':                                'es_MX.ISO8859-1',
+    'es_mx.iso88591':                       'es_MX.ISO8859-1',
+    'es_mx.utf8':                           'es_MX.UTF-8',
+    'es_ni':                                'es_NI.ISO8859-1',
+    'es_ni.iso88591':                       'es_NI.ISO8859-1',
+    'es_ni.utf8':                           'es_NI.UTF-8',
+    'es_pa':                                'es_PA.ISO8859-1',
+    'es_pa.iso88591':                       'es_PA.ISO8859-1',
+    'es_pa.iso885915':                      'es_PA.ISO8859-15',
+    'es_pa.utf8':                           'es_PA.UTF-8',
+    'es_pa@euro':                           'es_PA.ISO8859-15',
+    'es_pe':                                'es_PE.ISO8859-1',
+    'es_pe.iso88591':                       'es_PE.ISO8859-1',
+    'es_pe.iso885915':                      'es_PE.ISO8859-15',
+    'es_pe.utf8':                           'es_PE.UTF-8',
+    'es_pe@euro':                           'es_PE.ISO8859-15',
+    'es_pr':                                'es_PR.ISO8859-1',
+    'es_pr.iso88591':                       'es_PR.ISO8859-1',
+    'es_pr.utf8':                           'es_PR.UTF-8',
+    'es_py':                                'es_PY.ISO8859-1',
+    'es_py.iso88591':                       'es_PY.ISO8859-1',
+    'es_py.iso885915':                      'es_PY.ISO8859-15',
+    'es_py.utf8':                           'es_PY.UTF-8',
+    'es_py@euro':                           'es_PY.ISO8859-15',
+    'es_sv':                                'es_SV.ISO8859-1',
+    'es_sv.iso88591':                       'es_SV.ISO8859-1',
+    'es_sv.iso885915':                      'es_SV.ISO8859-15',
+    'es_sv.utf8':                           'es_SV.UTF-8',
+    'es_sv@euro':                           'es_SV.ISO8859-15',
+    'es_us':                                'es_US.ISO8859-1',
+    'es_uy':                                'es_UY.ISO8859-1',
+    'es_uy.iso88591':                       'es_UY.ISO8859-1',
+    'es_uy.iso885915':                      'es_UY.ISO8859-15',
+    'es_uy.utf8':                           'es_UY.UTF-8',
+    'es_uy@euro':                           'es_UY.ISO8859-15',
+    'es_ve':                                'es_VE.ISO8859-1',
+    'es_ve.iso88591':                       'es_VE.ISO8859-1',
+    'es_ve.iso885915':                      'es_VE.ISO8859-15',
+    'es_ve.utf8':                           'es_VE.UTF-8',
+    'es_ve@euro':                           'es_VE.ISO8859-15',
+    'estonian':                             'et_EE.ISO8859-1',
+    'et':                                   'et_EE.ISO8859-15',
+    'et_ee':                                'et_EE.ISO8859-15',
+    'et_ee.iso88591':                       'et_EE.ISO8859-1',
+    'et_ee.iso885913':                      'et_EE.ISO8859-13',
+    'et_ee.iso885915':                      'et_EE.ISO8859-15',
+    'et_ee.iso88594':                       'et_EE.ISO8859-4',
+    'et_ee.utf8':                           'et_EE.UTF-8',
+    'et_ee@euro':                           'et_EE.ISO8859-15',
+    'eu':                                   'eu_ES.ISO8859-1',
+    'eu_es':                                'eu_ES.ISO8859-1',
+    'eu_es.iso88591':                       'eu_ES.ISO8859-1',
+    'eu_es.iso885915':                      'eu_ES.ISO8859-15',
+    'eu_es@euro':                           'eu_ES.ISO8859-15',
+    'fa':                                   'fa_IR.UTF-8',
+    'fa_ir':                                'fa_IR.UTF-8',
+    'fa_ir.isiri3342':                      'fa_IR.ISIRI-3342',
+    'fa_ir.utf8':                           'fa_IR.UTF-8',
+    'fi':                                   'fi_FI.ISO8859-15',
+    'fi_fi':                                'fi_FI.ISO8859-15',
+    'fi_fi.88591':                          'fi_FI.ISO8859-1',
+    'fi_fi.iso88591':                       'fi_FI.ISO8859-1',
+    'fi_fi.iso885915':                      'fi_FI.ISO8859-15',
+    'fi_fi.utf8':                           'fi_FI.UTF-8',
+    'fi_fi.utf8@euro':                      'fi_FI.UTF-8',
+    'fi_fi@euro':                           'fi_FI.ISO8859-15',
+    'finnish':                              'fi_FI.ISO8859-1',
+    'finnish.iso88591':                     'fi_FI.ISO8859-1',
+    'fo':                                   'fo_FO.ISO8859-1',
+    'fo_fo':                                'fo_FO.ISO8859-1',
+    'fo_fo.iso88591':                       'fo_FO.ISO8859-1',
+    'fo_fo.iso885915':                      'fo_FO.ISO8859-15',
+    'fo_fo.utf8':                           'fo_FO.UTF-8',
+    'fo_fo@euro':                           'fo_FO.ISO8859-15',
+    'fr':                                   'fr_FR.ISO8859-1',
+    'fr_be':                                'fr_BE.ISO8859-1',
+    'fr_be.88591':                          'fr_BE.ISO8859-1',
+    'fr_be.iso88591':                       'fr_BE.ISO8859-1',
+    'fr_be.iso885915':                      'fr_BE.ISO8859-15',
+    'fr_be.utf8':                           'fr_BE.UTF-8',
+    'fr_be@euro':                           'fr_BE.ISO8859-15',
+    'fr_ca':                                'fr_CA.ISO8859-1',
+    'fr_ca.88591':                          'fr_CA.ISO8859-1',
+    'fr_ca.iso88591':                       'fr_CA.ISO8859-1',
+    'fr_ca.iso885915':                      'fr_CA.ISO8859-15',
+    'fr_ca.utf8':                           'fr_CA.UTF-8',
+    'fr_ca@euro':                           'fr_CA.ISO8859-15',
+    'fr_ch':                                'fr_CH.ISO8859-1',
+    'fr_ch.88591':                          'fr_CH.ISO8859-1',
+    'fr_ch.iso88591':                       'fr_CH.ISO8859-1',
+    'fr_ch.iso885915':                      'fr_CH.ISO8859-15',
+    'fr_ch.utf8':                           'fr_CH.UTF-8',
+    'fr_ch@euro':                           'fr_CH.ISO8859-15',
+    'fr_fr':                                'fr_FR.ISO8859-1',
+    'fr_fr.88591':                          'fr_FR.ISO8859-1',
+    'fr_fr.iso88591':                       'fr_FR.ISO8859-1',
+    'fr_fr.iso885915':                      'fr_FR.ISO8859-15',
+    'fr_fr.utf8':                           'fr_FR.UTF-8',
+    'fr_fr@euro':                           'fr_FR.ISO8859-15',
+    'fr_lu':                                'fr_LU.ISO8859-1',
+    'fr_lu.88591':                          'fr_LU.ISO8859-1',
+    'fr_lu.iso88591':                       'fr_LU.ISO8859-1',
+    'fr_lu.iso885915':                      'fr_LU.ISO8859-15',
+    'fr_lu.utf8':                           'fr_LU.UTF-8',
+    'fr_lu@euro':                           'fr_LU.ISO8859-15',
+    'fran\xe7ais':                          'fr_FR.ISO8859-1',
+    'fre_fr':                               'fr_FR.ISO8859-1',
+    'fre_fr.8859':                          'fr_FR.ISO8859-1',
+    'french':                               'fr_FR.ISO8859-1',
+    'french.iso88591':                      'fr_CH.ISO8859-1',
+    'french_france':                        'fr_FR.ISO8859-1',
+    'french_france.8859':                   'fr_FR.ISO8859-1',
+    'ga':                                   'ga_IE.ISO8859-1',
+    'ga_ie':                                'ga_IE.ISO8859-1',
+    'ga_ie.iso88591':                       'ga_IE.ISO8859-1',
+    'ga_ie.iso885914':                      'ga_IE.ISO8859-14',
+    'ga_ie.iso885915':                      'ga_IE.ISO8859-15',
+    'ga_ie.utf8':                           'ga_IE.UTF-8',
+    'ga_ie@euro':                           'ga_IE.ISO8859-15',
+    'galego':                               'gl_ES.ISO8859-1',
+    'galician':                             'gl_ES.ISO8859-1',
+    'gd':                                   'gd_GB.ISO8859-1',
+    'gd_gb':                                'gd_GB.ISO8859-1',
+    'gd_gb.iso88591':                       'gd_GB.ISO8859-1',
+    'gd_gb.iso885914':                      'gd_GB.ISO8859-14',
+    'gd_gb.iso885915':                      'gd_GB.ISO8859-15',
+    'gd_gb@euro':                           'gd_GB.ISO8859-15',
+    'ger_de':                               'de_DE.ISO8859-1',
+    'ger_de.8859':                          'de_DE.ISO8859-1',
+    'german':                               'de_DE.ISO8859-1',
+    'german.iso88591':                      'de_CH.ISO8859-1',
+    'german_germany':                       'de_DE.ISO8859-1',
+    'german_germany.8859':                  'de_DE.ISO8859-1',
+    'gl':                                   'gl_ES.ISO8859-1',
+    'gl_es':                                'gl_ES.ISO8859-1',
+    'gl_es.iso88591':                       'gl_ES.ISO8859-1',
+    'gl_es.iso885915':                      'gl_ES.ISO8859-15',
+    'gl_es.utf8':                           'gl_ES.UTF-8',
+    'gl_es@euro':                           'gl_ES.ISO8859-15',
+    'greek':                                'el_GR.ISO8859-7',
+    'greek.iso88597':                       'el_GR.ISO8859-7',
+    'gv':                                   'gv_GB.ISO8859-1',
+    'gv_gb':                                'gv_GB.ISO8859-1',
+    'gv_gb.iso88591':                       'gv_GB.ISO8859-1',
+    'gv_gb.iso885914':                      'gv_GB.ISO8859-14',
+    'gv_gb.iso885915':                      'gv_GB.ISO8859-15',
+    'gv_gb@euro':                           'gv_GB.ISO8859-15',
+    'he':                                   'he_IL.ISO8859-8',
+    'he_il':                                'he_IL.ISO8859-8',
+    'he_il.cp1255':                         'he_IL.CP1255',
+    'he_il.iso88598':                       'he_IL.ISO8859-8',
+    'he_il.microsoftcp1255':                'he_IL.CP1255',
+    'he_il.utf8':                           'he_IL.UTF-8',
+    'hebrew':                               'iw_IL.ISO8859-8',
+    'hebrew.iso88598':                      'iw_IL.ISO8859-8',
+    'hi':                                   'hi_IN.ISCII-DEV',
+    'hi_in':                                'hi_IN.ISCII-DEV',
+    'hi_in.isciidev':                       'hi_IN.ISCII-DEV',
+    'hi_in.utf8':                           'hi_IN.UTF-8',
+    'hr':                                   'hr_HR.ISO8859-2',
+    'hr_hr':                                'hr_HR.ISO8859-2',
+    'hr_hr.iso88592':                       'hr_HR.ISO8859-2',
+    'hr_hr.utf8':                           'hr_HR.UTF-8',
+    'hrvatski':                             'hr_HR.ISO8859-2',
+    'hu':                                   'hu_HU.ISO8859-2',
+    'hu_hu':                                'hu_HU.ISO8859-2',
+    'hu_hu.iso88592':                       'hu_HU.ISO8859-2',
+    'hungarian':                            'hu_HU.ISO8859-2',
+    'icelandic':                            'is_IS.ISO8859-1',
+    'icelandic.iso88591':                   'is_IS.ISO8859-1',
+    'id':                                   'id_ID.ISO8859-1',
+    'id_id':                                'id_ID.ISO8859-1',
+    'in':                                   'id_ID.ISO8859-1',
+    'in_id':                                'id_ID.ISO8859-1',
+    'is':                                   'is_IS.ISO8859-1',
+    'is_is':                                'is_IS.ISO8859-1',
+    'is_is.iso88591':                       'is_IS.ISO8859-1',
+    'is_is.iso885915':                      'is_IS.ISO8859-15',
+    'is_is.utf8':                           'is_IS.UTF-8',
+    'is_is@euro':                           'is_IS.ISO8859-15',
+    'iso-8859-1':                           'en_US.ISO8859-1',
+    'iso-8859-15':                          'en_US.ISO8859-15',
+    'iso8859-1':                            'en_US.ISO8859-1',
+    'iso8859-15':                           'en_US.ISO8859-15',
+    'iso_8859_1':                           'en_US.ISO8859-1',
+    'iso_8859_15':                          'en_US.ISO8859-15',
+    'it':                                   'it_IT.ISO8859-1',
+    'it_ch':                                'it_CH.ISO8859-1',
+    'it_ch.iso88591':                       'it_CH.ISO8859-1',
+    'it_ch.iso885915':                      'it_CH.ISO8859-15',
+    'it_ch.utf8':                           'it_CH.UTF-8',
+    'it_ch@euro':                           'it_CH.ISO8859-15',
+    'it_it':                                'it_IT.ISO8859-1',
+    'it_it.88591':                          'it_IT.ISO8859-1',
+    'it_it.iso88591':                       'it_IT.ISO8859-1',
+    'it_it.iso885915':                      'it_IT.ISO8859-15',
+    'it_it.utf8':                           'it_IT.UTF-8',
+    'it_it@euro':                           'it_IT.ISO8859-15',
+    'italian':                              'it_IT.ISO8859-1',
+    'italian.iso88591':                     'it_IT.ISO8859-1',
+    'iu':                                   'iu_CA.NUNACOM-8',
+    'iu_ca':                                'iu_CA.NUNACOM-8',
+    'iu_ca.nunacom8':                       'iu_CA.NUNACOM-8',
+    'iw':                                   'he_IL.ISO8859-8',
+    'iw_il':                                'he_IL.ISO8859-8',
+    'iw_il.iso88598':                       'he_IL.ISO8859-8',
+    'ja':                                   'ja_JP.eucJP',
+    'ja.jis':                               'ja_JP.JIS7',
+    'ja.sjis':                              'ja_JP.SJIS',
+    'ja_jp':                                'ja_JP.eucJP',
+    'ja_jp.ajec':                           'ja_JP.eucJP',
+    'ja_jp.euc':                            'ja_JP.eucJP',
+    'ja_jp.eucjp':                          'ja_JP.eucJP',
+    'ja_jp.iso-2022-jp':                    'ja_JP.JIS7',
+    'ja_jp.iso2022jp':                      'ja_JP.JIS7',
+    'ja_jp.jis':                            'ja_JP.JIS7',
+    'ja_jp.jis7':                           'ja_JP.JIS7',
+    'ja_jp.mscode':                         'ja_JP.SJIS',
+    'ja_jp.sjis':                           'ja_JP.SJIS',
+    'ja_jp.ujis':                           'ja_JP.eucJP',
+    'ja_jp.utf8':                           'ja_JP.UTF-8',
+    'japan':                                'ja_JP.eucJP',
+    'japanese':                             'ja_JP.eucJP',
+    'japanese-euc':                         'ja_JP.eucJP',
+    'japanese.euc':                         'ja_JP.eucJP',
+    'japanese.sjis':                        'ja_JP.SJIS',
+    'jp_jp':                                'ja_JP.eucJP',
+    'ka':                                   'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge':                                'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge.georgianacademy':                'ka_GE.GEORGIAN-ACADEMY',
+    'ka_ge.georgianps':                     'ka_GE.GEORGIAN-PS',
+    'ka_ge.georgianrs':                     'ka_GE.GEORGIAN-ACADEMY',
+    'kl':                                   'kl_GL.ISO8859-1',
+    'kl_gl':                                'kl_GL.ISO8859-1',
+    'kl_gl.iso88591':                       'kl_GL.ISO8859-1',
+    'kl_gl.iso885915':                      'kl_GL.ISO8859-15',
+    'kl_gl.utf8':                           'kl_GL.UTF-8',
+    'kl_gl@euro':                           'kl_GL.ISO8859-15',
+    'ko':                                   'ko_KR.eucKR',
+    'ko_kr':                                'ko_KR.eucKR',
+    'ko_kr.euc':                            'ko_KR.eucKR',
+    'ko_kr.euckr':                          'ko_KR.eucKR',
+    'ko_kr.utf8':                           'ko_KR.UTF-8',
+    'korean':                               'ko_KR.eucKR',
+    'korean.euc':                           'ko_KR.eucKR',
+    'kw':                                   'kw_GB.ISO8859-1',
+    'kw_gb':                                'kw_GB.ISO8859-1',
+    'kw_gb.iso88591':                       'kw_GB.ISO8859-1',
+    'kw_gb.iso885914':                      'kw_GB.ISO8859-14',
+    'kw_gb.iso885915':                      'kw_GB.ISO8859-15',
+    'kw_gb@euro':                           'kw_GB.ISO8859-15',
+    'lithuanian':                           'lt_LT.ISO8859-13',
+    'lo':                                   'lo_LA.MULELAO-1',
+    'lo_la':                                'lo_LA.MULELAO-1',
+    'lo_la.cp1133':                         'lo_LA.IBM-CP1133',
+    'lo_la.ibmcp1133':                      'lo_LA.IBM-CP1133',
+    'lo_la.mulelao1':                       'lo_LA.MULELAO-1',
+    'lt':                                   'lt_LT.ISO8859-13',
+    'lt_lt':                                'lt_LT.ISO8859-13',
+    'lt_lt.iso885913':                      'lt_LT.ISO8859-13',
+    'lt_lt.iso88594':                       'lt_LT.ISO8859-4',
+    'lt_lt.utf8':                           'lt_LT.UTF-8',
+    'lv':                                   'lv_LV.ISO8859-13',
+    'lv_lv':                                'lv_LV.ISO8859-13',
+    'lv_lv.iso885913':                      'lv_LV.ISO8859-13',
+    'lv_lv.iso88594':                       'lv_LV.ISO8859-4',
+    'lv_lv.utf8':                           'lv_LV.UTF-8',
+    'mi':                                   'mi_NZ.ISO8859-1',
+    'mi_nz':                                'mi_NZ.ISO8859-1',
+    'mi_nz.iso88591':                       'mi_NZ.ISO8859-1',
+    'mk':                                   'mk_MK.ISO8859-5',
+    'mk_mk':                                'mk_MK.ISO8859-5',
+    'mk_mk.cp1251':                         'mk_MK.CP1251',
+    'mk_mk.iso88595':                       'mk_MK.ISO8859-5',
+    'mk_mk.microsoftcp1251':                'mk_MK.CP1251',
+    'mk_mk.utf8':                           'mk_MK.UTF-8',
+    'mr_in.utf8':                           'mr_IN.UTF-8',
+    'ms':                                   'ms_MY.ISO8859-1',
+    'ms_my':                                'ms_MY.ISO8859-1',
+    'ms_my.iso88591':                       'ms_MY.ISO8859-1',
+    'mt':                                   'mt_MT.ISO8859-3',
+    'mt_mt':                                'mt_MT.ISO8859-3',
+    'mt_mt.iso88593':                       'mt_MT.ISO8859-3',
+    'nb':                                   'nb_NO.ISO8859-1',
+    'nb_no':                                'nb_NO.ISO8859-1',
+    'nb_no.88591':                          'nb_NO.ISO8859-1',
+    'nb_no.iso88591':                       'nb_NO.ISO8859-1',
+    'nb_no.iso885915':                      'nb_NO.ISO8859-15',
+    'nb_no@euro':                           'nb_NO.ISO8859-15',
+    'nl':                                   'nl_NL.ISO8859-1',
+    'nl_be':                                'nl_BE.ISO8859-1',
+    'nl_be.88591':                          'nl_BE.ISO8859-1',
+    'nl_be.iso88591':                       'nl_BE.ISO8859-1',
+    'nl_be.iso885915':                      'nl_BE.ISO8859-15',
+    'nl_be.utf8':                           'nl_BE.UTF-8',
+    'nl_be@euro':                           'nl_BE.ISO8859-15',
+    'nl_nl':                                'nl_NL.ISO8859-1',
+    'nl_nl.88591':                          'nl_NL.ISO8859-1',
+    'nl_nl.iso88591':                       'nl_NL.ISO8859-1',
+    'nl_nl.iso885915':                      'nl_NL.ISO8859-15',
+    'nl_nl.utf8':                           'nl_NL.UTF-8',
+    'nl_nl@euro':                           'nl_NL.ISO8859-15',
+    'nn':                                   'nn_NO.ISO8859-1',
+    'nn_no':                                'nn_NO.ISO8859-1',
+    'nn_no.88591':                          'nn_NO.ISO8859-1',
+    'nn_no.iso88591':                       'nn_NO.ISO8859-1',
+    'nn_no.iso885915':                      'nn_NO.ISO8859-15',
+    'nn_no@euro':                           'nn_NO.ISO8859-15',
+    'no':                                   'no_NO.ISO8859-1',
+    'no@nynorsk':                           'ny_NO.ISO8859-1',
+    'no_no':                                'no_NO.ISO8859-1',
+    'no_no.88591':                          'no_NO.ISO8859-1',
+    'no_no.iso88591':                       'no_NO.ISO8859-1',
+    'no_no.iso885915':                      'no_NO.ISO8859-15',
+    'no_no.utf8':                           'no_NO.UTF-8',
+    'no_no@euro':                           'no_NO.ISO8859-15',
+    'norwegian':                            'no_NO.ISO8859-1',
+    'norwegian.iso88591':                   'no_NO.ISO8859-1',
+    'ny':                                   'ny_NO.ISO8859-1',
+    'ny_no':                                'ny_NO.ISO8859-1',
+    'ny_no.88591':                          'ny_NO.ISO8859-1',
+    'ny_no.iso88591':                       'ny_NO.ISO8859-1',
+    'ny_no.iso885915':                      'ny_NO.ISO8859-15',
+    'ny_no@euro':                           'ny_NO.ISO8859-15',
+    'nynorsk':                              'nn_NO.ISO8859-1',
+    'oc':                                   'oc_FR.ISO8859-1',
+    'oc_fr':                                'oc_FR.ISO8859-1',
+    'oc_fr.iso88591':                       'oc_FR.ISO8859-1',
+    'oc_fr.iso885915':                      'oc_FR.ISO8859-15',
+    'oc_fr@euro':                           'oc_FR.ISO8859-15',
+    'pd':                                   'pd_US.ISO8859-1',
+    'pd_de':                                'pd_DE.ISO8859-1',
+    'pd_de.iso88591':                       'pd_DE.ISO8859-1',
+    'pd_de.iso885915':                      'pd_DE.ISO8859-15',
+    'pd_de@euro':                           'pd_DE.ISO8859-15',
+    'pd_us':                                'pd_US.ISO8859-1',
+    'pd_us.iso88591':                       'pd_US.ISO8859-1',
+    'pd_us.iso885915':                      'pd_US.ISO8859-15',
+    'pd_us@euro':                           'pd_US.ISO8859-15',
+    'ph':                                   'ph_PH.ISO8859-1',
+    'ph_ph':                                'ph_PH.ISO8859-1',
+    'ph_ph.iso88591':                       'ph_PH.ISO8859-1',
+    'pl':                                   'pl_PL.ISO8859-2',
+    'pl_pl':                                'pl_PL.ISO8859-2',
+    'pl_pl.iso88592':                       'pl_PL.ISO8859-2',
+    'pl_pl.utf8':                           'pl_PL.UTF-8',
+    'polish':                               'pl_PL.ISO8859-2',
+    'portuguese':                           'pt_PT.ISO8859-1',
+    'portuguese.iso88591':                  'pt_PT.ISO8859-1',
+    'portuguese_brazil':                    'pt_BR.ISO8859-1',
+    'portuguese_brazil.8859':               'pt_BR.ISO8859-1',
+    'posix':                                'C',
+    'posix-utf2':                           'C',
+    'pp':                                   'pp_AN.ISO8859-1',
+    'pp_an':                                'pp_AN.ISO8859-1',
+    'pp_an.iso88591':                       'pp_AN.ISO8859-1',
+    'pt':                                   'pt_PT.ISO8859-1',
+    'pt_br':                                'pt_BR.ISO8859-1',
+    'pt_br.88591':                          'pt_BR.ISO8859-1',
+    'pt_br.iso88591':                       'pt_BR.ISO8859-1',
+    'pt_br.iso885915':                      'pt_BR.ISO8859-15',
+    'pt_br.utf8':                           'pt_BR.UTF-8',
+    'pt_br@euro':                           'pt_BR.ISO8859-15',
+    'pt_pt':                                'pt_PT.ISO8859-1',
+    'pt_pt.88591':                          'pt_PT.ISO8859-1',
+    'pt_pt.iso88591':                       'pt_PT.ISO8859-1',
+    'pt_pt.iso885915':                      'pt_PT.ISO8859-15',
+    'pt_pt.utf8':                           'pt_PT.UTF-8',
+    'pt_pt.utf8@euro':                      'pt_PT.UTF-8',
+    'pt_pt@euro':                           'pt_PT.ISO8859-15',
+    'ro':                                   'ro_RO.ISO8859-2',
+    'ro_ro':                                'ro_RO.ISO8859-2',
+    'ro_ro.iso88592':                       'ro_RO.ISO8859-2',
+    'ro_ro.utf8':                           'ro_RO.UTF-8',
+    'romanian':                             'ro_RO.ISO8859-2',
+    'ru':                                   'ru_RU.ISO8859-5',
+    'ru_ru':                                'ru_RU.ISO8859-5',
+    'ru_ru.cp1251':                         'ru_RU.CP1251',
+    'ru_ru.iso88595':                       'ru_RU.ISO8859-5',
+    'ru_ru.koi8r':                          'ru_RU.KOI8-R',
+    'ru_ru.microsoftcp1251':                'ru_RU.CP1251',
+    'ru_ua':                                'ru_UA.KOI8-U',
+    'ru_ua.cp1251':                         'ru_UA.CP1251',
+    'ru_ua.koi8u':                          'ru_UA.KOI8-U',
+    'ru_ua.microsoftcp1251':                'ru_UA.CP1251',
+    'rumanian':                             'ro_RO.ISO8859-2',
+    'russian':                              'ru_RU.ISO8859-5',
+    'se_no':                                'se_NO.UTF-8',
+    'se_no.utf8':                           'se_NO.UTF-8',
+    'serbocroatian':                        'sh_YU.ISO8859-2',
+    'sh':                                   'sh_YU.ISO8859-2',
+    'sh_hr':                                'sh_HR.ISO8859-2',
+    'sh_hr.iso88592':                       'sh_HR.ISO8859-2',
+    'sh_sp':                                'sh_YU.ISO8859-2',
+    'sh_yu':                                'sh_YU.ISO8859-2',
+    'sh_yu.utf8':                           'sh_YU.UTF-8',
+    'sk':                                   'sk_SK.ISO8859-2',
+    'sk_sk':                                'sk_SK.ISO8859-2',
+    'sk_sk.iso88592':                       'sk_SK.ISO8859-2',
+    'sk_sk.utf8':                           'sk_SK.UTF-8',
+    'sl':                                   'sl_SI.ISO8859-2',
+    'sl_cs':                                'sl_CS.ISO8859-2',
+    'sl_si':                                'sl_SI.ISO8859-2',
+    'sl_si.iso88592':                       'sl_SI.ISO8859-2',
+    'sl_si.utf8':                           'sl_SI.UTF-8',
+    'slovak':                               'sk_SK.ISO8859-2',
+    'slovene':                              'sl_SI.ISO8859-2',
+    'slovenian':                            'sl_SI.ISO8859-2',
+    'sp':                                   'sp_YU.ISO8859-5',
+    'sp_yu':                                'sp_YU.ISO8859-5',
+    'spanish':                              'es_ES.ISO8859-1',
+    'spanish.iso88591':                     'es_ES.ISO8859-1',
+    'spanish_spain':                        'es_ES.ISO8859-1',
+    'spanish_spain.8859':                   'es_ES.ISO8859-1',
+    'sq':                                   'sq_AL.ISO8859-2',
+    'sq_al':                                'sq_AL.ISO8859-2',
+    'sq_al.iso88592':                       'sq_AL.ISO8859-2',
+    'sq_al.utf8':                           'sq_AL.UTF-8',
+    'sr':                                   'sr_YU.ISO8859-5',
+    'sr@cyrillic':                          'sr_YU.ISO8859-5',
+    'sr_sp':                                'sr_SP.ISO8859-2',
+    'sr_yu':                                'sr_YU.ISO8859-5',
+    'sr_yu.cp1251@cyrillic':                'sr_YU.CP1251',
+    'sr_yu.iso88592':                       'sr_YU.ISO8859-2',
+    'sr_yu.iso88595':                       'sr_YU.ISO8859-5',
+    'sr_yu.iso88595@cyrillic':              'sr_YU.ISO8859-5',
+    'sr_yu.microsoftcp1251@cyrillic':       'sr_YU.CP1251',
+    'sr_yu.utf8':                           'sr_YU.UTF-8',
+    'sr_yu.utf8@cyrillic':                  'sr_YU.UTF-8',
+    'sr_yu@cyrillic':                       'sr_YU.ISO8859-5',
+    'sv':                                   'sv_SE.ISO8859-1',
+    'sv_fi':                                'sv_FI.ISO8859-1',
+    'sv_fi.iso88591':                       'sv_FI.ISO8859-1',
+    'sv_fi.iso885915':                      'sv_FI.ISO8859-15',
+    'sv_fi.utf8':                           'sv_FI.UTF-8',
+    'sv_fi@euro':                           'sv_FI.ISO8859-15',
+    'sv_se':                                'sv_SE.ISO8859-1',
+    'sv_se.88591':                          'sv_SE.ISO8859-1',
+    'sv_se.iso88591':                       'sv_SE.ISO8859-1',
+    'sv_se.iso885915':                      'sv_SE.ISO8859-15',
+    'sv_se.utf8':                           'sv_SE.UTF-8',
+    'sv_se@euro':                           'sv_SE.ISO8859-15',
+    'swedish':                              'sv_SE.ISO8859-1',
+    'swedish.iso88591':                     'sv_SE.ISO8859-1',
+    'ta':                                   'ta_IN.TSCII-0',
+    'ta_in':                                'ta_IN.TSCII-0',
+    'ta_in.tscii':                          'ta_IN.TSCII-0',
+    'ta_in.tscii0':                         'ta_IN.TSCII-0',
+    'tg':                                   'tg_TJ.KOI8-C',
+    'tg_tj':                                'tg_TJ.KOI8-C',
+    'tg_tj.koi8c':                          'tg_TJ.KOI8-C',
+    'th':                                   'th_TH.ISO8859-11',
+    'th_th':                                'th_TH.ISO8859-11',
+    'th_th.iso885911':                      'th_TH.ISO8859-11',
+    'th_th.tactis':                         'th_TH.TIS620',
+    'th_th.tis620':                         'th_TH.TIS620',
+    'th_th.utf8':                           'th_TH.UTF-8',
+    'thai':                                 'th_TH.ISO8859-11',
+    'tl':                                   'tl_PH.ISO8859-1',
+    'tl_ph':                                'tl_PH.ISO8859-1',
+    'tl_ph.iso88591':                       'tl_PH.ISO8859-1',
+    'tr':                                   'tr_TR.ISO8859-9',
+    'tr_tr':                                'tr_TR.ISO8859-9',
+    'tr_tr.iso88599':                       'tr_TR.ISO8859-9',
+    'tr_tr.utf8':                           'tr_TR.UTF-8',
+    'tt':                                   'tt_RU.TATAR-CYR',
+    'tt_ru':                                'tt_RU.TATAR-CYR',
+    'tt_ru.koi8c':                          'tt_RU.KOI8-C',
+    'tt_ru.tatarcyr':                       'tt_RU.TATAR-CYR',
+    'turkish':                              'tr_TR.ISO8859-9',
+    'turkish.iso88599':                     'tr_TR.ISO8859-9',
+    'uk':                                   'uk_UA.KOI8-U',
+    'uk_ua':                                'uk_UA.KOI8-U',
+    'uk_ua.cp1251':                         'uk_UA.CP1251',
+    'uk_ua.iso88595':                       'uk_UA.ISO8859-5',
+    'uk_ua.koi8u':                          'uk_UA.KOI8-U',
+    'uk_ua.microsoftcp1251':                'uk_UA.CP1251',
+    'uk_ua.utf8':                           'uk_UA.UTF-8',
+    'univ':                                 'en_US.utf',
+    'univ.utf8':                            'en_US.UTF-8',
+    'universal':                            'en_US.utf',
+    'universal.utf8@ucs4':                  'en_US.UTF-8',
+    'ur':                                   'ur_PK.CP1256',
+    'ur_pk':                                'ur_PK.CP1256',
+    'ur_pk.cp1256':                         'ur_PK.CP1256',
+    'ur_pk.microsoftcp1256':                'ur_PK.CP1256',
+    'uz':                                   'uz_UZ.UTF-8',
+    'uz_uz':                                'uz_UZ.UTF-8',
+    'vi':                                   'vi_VN.TCVN',
+    'vi_vn':                                'vi_VN.TCVN',
+    'vi_vn.tcvn':                           'vi_VN.TCVN',
+    'vi_vn.tcvn5712':                       'vi_VN.TCVN',
+    'vi_vn.utf8':                           'vi_VN.UTF-8',
+    'vi_vn.viscii':                         'vi_VN.VISCII',
+    'vi_vn.viscii111':                      'vi_VN.VISCII',
+    'wa':                                   'wa_BE.ISO8859-1',
+    'wa_be':                                'wa_BE.ISO8859-1',
+    'wa_be.iso88591':                       'wa_BE.ISO8859-1',
+    'wa_be.iso885915':                      'wa_BE.ISO8859-15',
+    'wa_be@euro':                           'wa_BE.ISO8859-15',
+    'yi':                                   'yi_US.CP1255',
+    'yi_us':                                'yi_US.CP1255',
+    'yi_us.cp1255':                         'yi_US.CP1255',
+    'yi_us.microsoftcp1255':                'yi_US.CP1255',
+    'zh':                                   'zh_CN.eucCN',
+    'zh_cn':                                'zh_CN.gb2312',
+    'zh_cn.big5':                           'zh_TW.big5',
+    'zh_cn.euc':                            'zh_CN.eucCN',
+    'zh_cn.gb18030':                        'zh_CN.gb18030',
+    'zh_cn.gb2312':                         'zh_CN.gb2312',
+    'zh_cn.gbk':                            'zh_CN.gbk',
+    'zh_cn.utf8':                           'zh_CN.UTF-8',
+    'zh_hk':                                'zh_HK.big5hkscs',
+    'zh_hk.big5':                           'zh_HK.big5',
+    'zh_hk.big5hkscs':                      'zh_HK.big5hkscs',
+    'zh_hk.utf8':                           'zh_HK.UTF-8',
+    'zh_sg.utf8':                           'zh_SG.UTF-8',
+    'zh_tw':                                'zh_TW.big5',
+    'zh_tw.big5':                           'zh_TW.big5',
+    'zh_tw.euc':                            'zh_TW.eucTW',
+    'zh_tw.utf8':                           'zh_TW.UTF-8',
index 8a710ffcb7f2ccb15e12ed7a4ef24c7b343254ac..6bf7ab4dbcda4b3417f3caa6ad3f0b3d80a79524 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -28,6 +28,11 @@ Extension Modules
+- locale.py now uses an updated locale alias table (built using
+  Tools/i18n/makelocalealias.py, a tool to parse the X11 locale
+  alias file); the encoding lookup was enhanced to use Python's
+  encoding alias table
 - moved deprecated modules to Lib/lib-old:  whrandom, tzparse, statcache.
 - the pickle module no longer accepts the deprecated None option in the
diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py
new file mode 100644 (file)
index 0000000..a52bc28
--- /dev/null
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+    Convert the X11 locale.alias file into a mapping dictionary suitable
+    for locale.py.
+    Written by Marc-Andre Lemburg <mal@genix.com>, 2004-12-10.
+import locale
+# Location of the alias file
+LOCALE_ALIAS = '/usr/lib/X11/locale/locale.alias'
+def parse(filename):
+    f = open(filename)
+    lines = f.read().splitlines()
+    data = {}
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        if line[:1] == '#':
+            continue
+        locale, alias = line.split()
+        # Strip ':'
+        if locale[-1] == ':':
+            locale = locale[:-1]
+        # Lower-case locale
+        locale = locale.lower()
+        # Ignore one letter locale mappings (except for 'c')
+        if len(locale) == 1 and locale != 'c':
+            continue
+        # Normalize encoding, if given
+        if '.' in locale:
+            lang, encoding = locale.split('.')[:2]
+            encoding = encoding.replace('-', '')
+            encoding = encoding.replace('_', '')
+            locale = lang + '.' + encoding
+        data[locale] = alias
+    return data
+def pprint(data):
+    items = data.items()
+    items.sort()
+    for k,v in items:
+        print '    %-40s%r,' % ('%r:' % k, v)
+def print_differences(data, olddata):
+    items = olddata.items()
+    items.sort()
+    for k, v in items:
+        if not data.has_key(k):
+            print '#    removed %r' % k
+        elif olddata[k] != data[k]:
+            print '#    updated %r -> %r to %r' % \
+                  (k, olddata[k], data[k])
+        # Additions are not mentioned
+if __name__ == '__main__':
+    data = locale.locale_alias.copy()
+    data.update(parse(LOCALE_ALIAS))
+    print_differences(data, locale.locale_alias)
+    print
+    print 'locale_alias = {'
+    pprint(data)
+    print '}'