From: Fredrik Roubert Date: Wed, 15 Feb 2017 23:49:47 +0000 (+0000) Subject: ICU-12953 Include GSM 03.38 mapping in ICU by default X-Git-Tag: release-59-rc~146 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4eaff9ce2c22dc9b9aaa68acd833f07aa85f399e;p=icu ICU-12953 Include GSM 03.38 mapping in ICU by default X-SVN-Rev: 39677 --- diff --git a/.gitattributes b/.gitattributes index f2b629ee974..cb3fd4f3530 100644 --- a/.gitattributes +++ b/.gitattributes @@ -67,6 +67,7 @@ icu4c/source/data/in/uts46.nrm -text icu4c/source/data/lang/pool.res -text icu4c/source/data/locales/pool.res -text icu4c/source/data/makedata.vcxproj -text +icu4c/source/data/mappings/gsm-03.38-2009.ucm -text icu4c/source/data/region/pool.res -text icu4c/source/data/unit/pool.res -text icu4c/source/data/zone/pool.res -text diff --git a/icu4c/source/data/mappings/convrtrs.txt b/icu4c/source/data/mappings/convrtrs.txt index b32ceeb9808..233bdc908f1 100644 --- a/icu4c/source/data/mappings/convrtrs.txt +++ b/icu4c/source/data/mappings/convrtrs.txt @@ -938,6 +938,8 @@ ibm-1129_P100-1997 { UTR22* } ibm-1129 { IBM* } ibm-1131_P100-1997 { UTR22* } ibm-1131 { IBM* } cp1131 # Cyrillic Belarus PC ibm-1133_P100-1997 { UTR22* } ibm-1133 { IBM* } # ISO Lao +# GSM 03.38 +gsm-03.38-2009 { UTR22* } GSM0338 # GSM0338 alias is from Perl # Partially algorithmic converters @@ -1150,7 +1152,6 @@ ebcdic-xml-us # These are not installed by default. They are rarely used. # Many of them can be added through the online ICU Data Library Customization tool -gsm-03.38-2000 { UTR22* } GSM0338 # GSM0338 alias is from Perl ibm-1004_P100-1995 { UTR22* } ibm-1004 { IBM* } ibm-1008_P100-1995 { UTR22* } ibm-1008 { IBM* } # cp1008, 8-bit Arabic (w/o euro update) ibm-1009_P100-1995 { UTR22* } ibm-1009 { IBM* } diff --git a/icu4c/source/data/mappings/gsm-03.38-2009.ucm b/icu4c/source/data/mappings/gsm-03.38-2009.ucm new file mode 100644 index 00000000000..80bc7565f36 --- /dev/null +++ b/icu4c/source/data/mappings/gsm-03.38-2009.ucm @@ -0,0 +1,218 @@ +# Copyright (C) 2017 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# +# Name: GSM 03.38 to Unicode +# Unicode version: 3.0 +# Table version: 2.0 +# Date: 2009 Nov 10 +# Authors: Ken Whistler +# Kent Karlsson +# Markus Kuhn +# +# Source: http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT +# See there for the license and for a description of the charset. +# Formatted into ICU .ucm format by Markus Scherer on 2006-nov-02. +# Updated to table version 2.0 by Fredrik Roubert on 2017-feb-08. +# Commented-out mappings are turned into fallbacks (|1), all others are turned +# into round-trips (|0). +# Multi-byte mappings are preserved as multi-single-byte character mappings, +# using ICU's m:n conversion capability. +# +# The substitution character is not documented in the Unicode file. +# \x3F is chosen here because \x1A is a graphic character. +# +# Other deviations from the Unicode file: +# a) +# The GSM standard specifies that one or two ESC bytes (\x1B), if not followed +# by a recognized final byte, be mapped to spaces (that is, reverse fallbacks +# to U+0020). +# The Unicode file round-trips a single \x1B to U+00A0 (NBSP) and has no mapping +# for \x1B\x1B. +# (Reverse fallbacks to U+00A0 would result in Unicode text that cannot be +# converted back to GSM 03.38. A roundtrip for U+00A0 adds a character that is +# not mappable in the standard.) +# +# See the ietf-charsets list email "Re: GSM 03.38 substitution character?" +# at http://mail.apps.ietf.org/ietf/charsets/msg01696.html +# +# b) +# The GSM standard maps U+00C7 capital C-cedilla to \x09 but the Unicode file +# contains and documents a "fix" to map U+00E7 small c-cedilla instead, based on +# an interpretation of the intent of the standard. Prevailing implementations +# in mobile phones follow the standard. +# +# This file follows the GSM standard. +# +# See the GSM standard at +# http://www.3gpp.org/ftp/Specs/archive/03_series/03.38/0338-720.zip +# +# For problems with the table format please submit a bug +# at http://www.icu-project.org/ . +# For issues with the mappings please contact Unicode +# at http://www.unicode.org/reporting.html + + "gsm-03.38-2009" + "AXXXX" + 1 + 1 + "SBCS" + 0-7f + \x3F + "ASCII" + +CHARMAP + \x00 |1 + \x0A |0 + \x1B\x0A |0 + \x0D |0 + \x20 |0 + \x1B |3 + \x1B\x1B |3 + \x21 |0 + \x22 |0 + \x23 |0 + \x02 |0 + \x25 |0 + \x26 |0 + \x27 |0 + \x28 |0 + \x29 |0 + \x2A |0 + \x2B |0 + \x2C |0 + \x2D |0 + \x2E |0 + \x2F |0 + \x30 |0 + \x31 |0 + \x32 |0 + \x33 |0 + \x34 |0 + \x35 |0 + \x36 |0 + \x37 |0 + \x38 |0 + \x39 |0 + \x3A |0 + \x3B |0 + \x3C |0 + \x3D |0 + \x3E |0 + \x3F |0 + \x00 |0 + \x41 |0 + \x42 |0 + \x43 |0 + \x44 |0 + \x45 |0 + \x46 |0 + \x47 |0 + \x48 |0 + \x49 |0 + \x4A |0 + \x4B |0 + \x4C |0 + \x4D |0 + \x4E |0 + \x4F |0 + \x50 |0 + \x51 |0 + \x52 |0 + \x53 |0 + \x54 |0 + \x55 |0 + \x56 |0 + \x57 |0 + \x58 |0 + \x59 |0 + \x5A |0 + \x1B\x3C |0 + \x1B\x2F |0 + \x1B\x3E |0 + \x1B\x14 |0 + \x11 |0 + \x61 |0 + \x62 |0 + \x63 |0 + \x64 |0 + \x65 |0 + \x66 |0 + \x67 |0 + \x68 |0 + \x69 |0 + \x6A |0 + \x6B |0 + \x6C |0 + \x6D |0 + \x6E |0 + \x6F |0 + \x70 |0 + \x71 |0 + \x72 |0 + \x73 |0 + \x74 |0 + \x75 |0 + \x76 |0 + \x77 |0 + \x78 |0 + \x79 |0 + \x7A |0 + \x1B\x28 |0 + \x1B\x40 |0 + \x1B\x29 |0 + \x1B\x3D |0 + \x40 |0 + \x01 |0 + \x24 |0 + \x03 |0 + \x5F |0 + \x60 |0 + \x5B |0 + \x0E |0 + \x1C |0 + \x09 |0 + \x1F |0 + \x5D |0 + \x5C |0 + \x0B |0 + \x5E |0 + \x1E |0 + \x7F |0 + \x7B |0 + \x0F |0 + \x1D |0 + \x04 |0 + \x05 |0 + \x07 |0 + \x7D |0 + \x08 |0 + \x7C |0 + \x0C |0 + \x06 |0 + \x7E |0 + \x41 |1 + \x42 |1 + \x13 |0 + \x10 |0 + \x45 |1 + \x5A |1 + \x48 |1 + \x19 |0 + \x49 |1 + \x4B |1 + \x14 |0 + \x4D |1 + \x4E |1 + \x1A |0 + \x4F |1 + \x16 |0 + \x50 |1 + \x18 |0 + \x54 |1 + \x59 |1 + \x12 |0 + \x58 |1 + \x17 |0 + \x15 |0 + \x1B\x65 |0 +END CHARMAP diff --git a/icu4c/source/data/mappings/ucmfiles.mk b/icu4c/source/data/mappings/ucmfiles.mk index 124f50d1159..ab277900f20 100644 --- a/icu4c/source/data/mappings/ucmfiles.mk +++ b/icu4c/source/data/mappings/ucmfiles.mk @@ -120,5 +120,5 @@ icu-internal-compound-d1.ucm icu-internal-compound-d2.ucm icu-internal-compound- icu-internal-compound-d5.ucm icu-internal-compound-d6.ucm icu-internal-compound-d7.ucm \ icu-internal-compound-s1.ucm icu-internal-compound-s2.ucm icu-internal-compound-s3.ucm icu-internal-compound-t.ucm \ euc-jp-2007.ucm\ -euc-tw-2014.ucm - +euc-tw-2014.ucm\ +gsm-03.38-2009.ucm