]> granicus.if.org Git - icu/commitdiff
ICU-8951 Legacy/BCP 47 keyword conversion APIs merged into ICU4J trunk.
authorYoshito Umaoka <y.umaoka@gmail.com>
Thu, 28 Aug 2014 01:27:49 +0000 (01:27 +0000)
committerYoshito Umaoka <y.umaoka@gmail.com>
Thu, 28 Aug 2014 01:27:49 +0000 (01:27 +0000)
X-SVN-Rev: 36261

.gitattributes
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/UnicodeLocaleExtension.java
icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java

index e6f7aa7e4ca9da72bdb724190c585983115c0d52..ead94d5644a7dc0fdb405a90d82f60eb3e63ef84 100644 (file)
@@ -268,6 +268,7 @@ icu4j/main/classes/core/.settings/org.eclipse.core.resources.prefs -text
 icu4j/main/classes/core/.settings/org.eclipse.jdt.core.prefs -text
 icu4j/main/classes/core/manifest.stub -text
 icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java -text
+icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java -text
 icu4j/main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch -text
 icu4j/main/classes/currdata/.settings/org.eclipse.core.resources.prefs -text
 icu4j/main/classes/currdata/.settings/org.eclipse.jdt.core.prefs -text
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java
new file mode 100644 (file)
index 0000000..ce4ceb4
--- /dev/null
@@ -0,0 +1,542 @@
+/*\r
+ *******************************************************************************\r
+ * Copyright (C) 2014, International Business Machines Corporation and\r
+ * others. All Rights Reserved.\r
+ *******************************************************************************\r
+ */\r
+package com.ibm.icu.impl.locale;\r
+\r
+import java.util.EnumSet;\r
+import java.util.HashMap;\r
+import java.util.HashSet;\r
+import java.util.Map;\r
+import java.util.MissingResourceException;\r
+import java.util.Set;\r
+import java.util.regex.Pattern;\r
+\r
+import com.ibm.icu.impl.ICUResourceBundle;\r
+import com.ibm.icu.util.Output;\r
+import com.ibm.icu.util.UResourceBundle;\r
+import com.ibm.icu.util.UResourceBundleIterator;\r
+\r
+/**\r
+ */\r
+public class KeyTypeData {\r
+\r
+    private static abstract class SpecialTypeHandler {\r
+        abstract boolean isValid(String value);\r
+        String canonicalize(String value) {\r
+            return AsciiUtil.toLowerString(value);\r
+        }\r
+    }\r
+\r
+    private static class CodepointsTypeHandler extends SpecialTypeHandler {\r
+        private static final Pattern pat = Pattern.compile("[0-9a-fA-F]{4,6}(-[0-9a-fA-F]{4,6})*");\r
+        boolean isValid(String value) {\r
+            return pat.matcher(value).matches();\r
+        }\r
+    }\r
+\r
+    private static class ReorderCodeTypeHandler extends SpecialTypeHandler {\r
+        private static final Pattern pat = Pattern.compile("[a-zA-Z]{3,8}(-[a-zA-Z]{3,8})*");\r
+        boolean isValid(String value) {\r
+            return pat.matcher(value).matches();\r
+        }\r
+    }\r
+\r
+    private enum SpecialType {\r
+        CODEPOINTS(new CodepointsTypeHandler()),\r
+        REORDER_CODE(new ReorderCodeTypeHandler());\r
+\r
+        SpecialTypeHandler handler;\r
+        SpecialType(SpecialTypeHandler handler) {\r
+            this.handler = handler;\r
+        }\r
+    };\r
+\r
+    private static class KeyData {\r
+        String legacyId;\r
+        String bcpId;\r
+        Map<String, Type> typeMap;\r
+        EnumSet<SpecialType> specialTypes;\r
+\r
+        KeyData(String legacyId, String bcpId, Map<String, Type> typeMap,\r
+                EnumSet<SpecialType> specialTypes) {\r
+            this.legacyId = legacyId;\r
+            this.bcpId = bcpId;\r
+            this.typeMap = typeMap;\r
+            this.specialTypes = specialTypes;\r
+        }\r
+    }\r
+\r
+    private static class Type {\r
+        String legacyId;\r
+        String bcpId;\r
+\r
+        Type(String legacyId, String bcpId) {\r
+            this.legacyId = legacyId;\r
+            this.bcpId = bcpId;\r
+        }\r
+    }\r
+\r
+    public static String toBcpKey(String key) {\r
+        key = AsciiUtil.toLowerString(key);\r
+        KeyData keyData = KEYMAP.get(key);\r
+        if (keyData != null) {\r
+            return keyData.bcpId;\r
+        }\r
+        return null;\r
+    }\r
+\r
+    public static String toLegacyKey(String key) {\r
+        key = AsciiUtil.toLowerString(key);\r
+        KeyData keyData = KEYMAP.get(key);\r
+        if (keyData != null) {\r
+            return keyData.legacyId;\r
+        }\r
+        return null;\r
+    }\r
+\r
+    public static String toBcpType(String key, String type,\r
+            Output<Boolean> isKnownKey, Output<Boolean> isSpecialType) {\r
+\r
+        if (isKnownKey != null) {\r
+            isKnownKey.value = false;\r
+        }\r
+        if (isSpecialType != null) {\r
+            isSpecialType.value = false;\r
+        }\r
+\r
+        key = AsciiUtil.toLowerString(key);\r
+        type = AsciiUtil.toLowerString(type);\r
+\r
+        KeyData keyData = KEYMAP.get(key);\r
+        if (keyData != null) {\r
+            if (isKnownKey != null) {\r
+                isKnownKey.value = Boolean.TRUE;\r
+            }\r
+            Type t = keyData.typeMap.get(type);\r
+            if (t != null) {\r
+                return t.bcpId;\r
+            }\r
+            if (keyData.specialTypes != null) {\r
+                for (SpecialType st : keyData.specialTypes) {\r
+                    if (st.handler.isValid(type)) {\r
+                        if (isSpecialType != null) {\r
+                            isSpecialType.value = true;\r
+                        }\r
+                        return st.handler.canonicalize(type);\r
+                    }\r
+                }\r
+            }\r
+        }\r
+        return null;\r
+    }\r
+\r
+\r
+    public static String toLegacyType(String key, String type,\r
+            Output<Boolean> isKnownKey, Output<Boolean> isSpecialType) {\r
+\r
+        if (isKnownKey != null) {\r
+            isKnownKey.value = false;\r
+        }\r
+        if (isSpecialType != null) {\r
+            isSpecialType.value = false;\r
+        }\r
+\r
+        key = AsciiUtil.toLowerString(key);\r
+        type = AsciiUtil.toLowerString(type);\r
+\r
+        KeyData keyData = KEYMAP.get(key);\r
+        if (keyData != null) {\r
+            if (isKnownKey != null) {\r
+                isKnownKey.value = Boolean.TRUE;\r
+            }\r
+            Type t = keyData.typeMap.get(type);\r
+            if (t != null) {\r
+                return t.legacyId;\r
+            }\r
+            if (keyData.specialTypes != null) {\r
+                for (SpecialType st : keyData.specialTypes) {\r
+                    if (st.handler.isValid(type)) {\r
+                        if (isSpecialType != null) {\r
+                            isSpecialType.value = true;\r
+                        }\r
+                        return st.handler.canonicalize(type);\r
+                    }\r
+                }\r
+            }\r
+        }\r
+        return null;\r
+    }\r
+\r
+\r
+    private static void initFromResourceBundle() {\r
+        UResourceBundle keyTypeDataRes = UResourceBundle.getBundleInstance(\r
+                ICUResourceBundle.ICU_BASE_NAME,\r
+                "keyTypeData",\r
+                ICUResourceBundle.ICU_DATA_CLASS_LOADER);\r
+        UResourceBundle keyMapRes = keyTypeDataRes.get("keyMap");\r
+        UResourceBundle typeMapRes = keyTypeDataRes.get("typeMap");\r
+\r
+        // alias data is optional\r
+        UResourceBundle typeAliasRes = null;\r
+        UResourceBundle bcpTypeAliasRes = null;\r
+\r
+        try {\r
+            typeAliasRes = keyTypeDataRes.get("typeAlias");\r
+        } catch (MissingResourceException e) {\r
+            // fall through\r
+        }\r
+\r
+        try {\r
+            bcpTypeAliasRes = keyTypeDataRes.get("bcpTypeAlias");\r
+        } catch (MissingResourceException e) {\r
+            // fall through\r
+        }\r
+\r
+        // iterate through keyMap resource\r
+        UResourceBundleIterator keyMapItr = keyMapRes.getIterator();\r
+        while (keyMapItr.hasNext()) {\r
+            UResourceBundle keyMapEntry = keyMapItr.next();\r
+            String legacyKeyId = keyMapEntry.getKey();\r
+            String bcpKeyId = keyMapEntry.getString();\r
+\r
+            boolean hasSameKey = false;\r
+            if (bcpKeyId.length() == 0) {\r
+                // Empty value indicates that BCP key is same with the legacy key.\r
+                bcpKeyId = legacyKeyId;\r
+                hasSameKey = true;\r
+            }\r
+\r
+            boolean isTZ = legacyKeyId.equals("timezone");\r
+\r
+            // reverse type alias map\r
+            Map<String, Set<String>> typeAliasMap = null;\r
+            if (typeAliasRes != null) {\r
+                UResourceBundle typeAliasResByKey = null;\r
+                try {\r
+                    typeAliasResByKey = typeAliasRes.get(legacyKeyId);\r
+                } catch (MissingResourceException e) {\r
+                    // fall through\r
+                }\r
+                if (typeAliasResByKey != null) {\r
+                    typeAliasMap = new HashMap<String, Set<String>>();\r
+                    UResourceBundleIterator typeAliasResItr = typeAliasResByKey.getIterator();\r
+                    while (typeAliasResItr.hasNext()) {\r
+                        UResourceBundle typeAliasDataEntry = typeAliasResItr.next();\r
+                        String from = typeAliasDataEntry.getKey();\r
+                        String to = typeAliasDataEntry.getString();\r
+                        if (isTZ) {\r
+                            from = from.replace(':', '/');\r
+                        }\r
+                        Set<String> aliasSet = typeAliasMap.get(to);\r
+                        if (aliasSet == null) {\r
+                            aliasSet = new HashSet<String>();\r
+                            typeAliasMap.put(to, aliasSet);\r
+                        }\r
+                        aliasSet.add(from);\r
+                    }\r
+                }\r
+            }\r
+\r
+            // reverse bcp type alias map\r
+            Map<String, Set<String>> bcpTypeAliasMap = null;\r
+            if (bcpTypeAliasRes != null) {\r
+                UResourceBundle bcpTypeAliasResByKey = null;\r
+                try {\r
+                    bcpTypeAliasResByKey = bcpTypeAliasRes.get(bcpKeyId);\r
+                } catch (MissingResourceException e) {\r
+                    // fall through\r
+                }\r
+                if (bcpTypeAliasResByKey != null) {\r
+                    bcpTypeAliasMap = new HashMap<String, Set<String>>();\r
+                    UResourceBundleIterator bcpTypeAliasResItr = bcpTypeAliasResByKey.getIterator();\r
+                    while (bcpTypeAliasResItr.hasNext()) {\r
+                        UResourceBundle bcpTypeAliasDataEntry = bcpTypeAliasResItr.next();\r
+                        String from = bcpTypeAliasDataEntry.getKey();\r
+                        String to = bcpTypeAliasDataEntry.getString();\r
+                        Set<String> aliasSet = bcpTypeAliasMap.get(to);\r
+                        if (aliasSet == null) {\r
+                            aliasSet = new HashSet<String>();\r
+                            bcpTypeAliasMap.put(to, aliasSet);\r
+                        }\r
+                        aliasSet.add(from);\r
+                    }\r
+                }\r
+            }\r
+\r
+            Map<String, Type> typeDataMap = new HashMap<String, Type>();\r
+            Set<SpecialType> specialTypeSet = null;\r
+\r
+            // look up type map for the key, and walk through the mapping data\r
+            UResourceBundle typeMapResByKey = null;\r
+            try {\r
+                typeMapResByKey = typeMapRes.get(legacyKeyId);\r
+            } catch (MissingResourceException e) {\r
+                // type map for each key must exist\r
+                assert false;\r
+            }\r
+            if (typeMapResByKey != null) {\r
+                UResourceBundleIterator typeMapResByKeyItr = typeMapResByKey.getIterator();\r
+                while (typeMapResByKeyItr.hasNext()) {\r
+                    UResourceBundle typeMapEntry = typeMapResByKeyItr.next();\r
+                    String legacyTypeId = typeMapEntry.getKey();\r
+\r
+                    // special types\r
+                    boolean isSpecialType = false;\r
+                    for (SpecialType st : SpecialType.values()) {\r
+                        if (legacyTypeId.equals(st.toString())) {\r
+                            isSpecialType = true;\r
+                            if (specialTypeSet == null) {\r
+                                specialTypeSet = new HashSet<SpecialType>();\r
+                            }\r
+                            specialTypeSet.add(st);\r
+                            break;\r
+                        }\r
+                    }\r
+                    if (isSpecialType) {\r
+                        continue;\r
+                    }\r
+\r
+                    if (isTZ) {\r
+                        // a timezone key uses a colon instead of a slash in the resource.\r
+                        // e.g. America:Los_Angeles\r
+                        legacyTypeId = legacyTypeId.replace(':', '/');\r
+                    }\r
+\r
+                    String bcpTypeId = typeMapEntry.getString();\r
+\r
+                    boolean hasSameType = false;\r
+                    if (bcpTypeId.length() == 0) {\r
+                        // Empty value indicates that BCP type is same with the legacy type.\r
+                        bcpTypeId = legacyTypeId;\r
+                        hasSameType = true;\r
+                    }\r
+\r
+                    // Note: legacy type value should never be\r
+                    // equivalent to bcp type value of a different\r
+                    // type under the same key. So we use a single\r
+                    // map for lookup.\r
+                    Type t = new Type(legacyTypeId, bcpTypeId);\r
+                    typeDataMap.put(AsciiUtil.toLowerString(legacyTypeId), t);\r
+                    if (!hasSameType) {\r
+                        typeDataMap.put(AsciiUtil.toLowerString(bcpTypeId), t);\r
+                    }\r
+\r
+                    // Also put aliases in the map\r
+                    if (typeAliasMap != null) {\r
+                        Set<String> typeAliasSet = typeAliasMap.get(legacyTypeId);\r
+                        if (typeAliasSet != null) {\r
+                            for (String alias : typeAliasSet) {\r
+                                typeDataMap.put(AsciiUtil.toLowerString(alias), t);\r
+                            }\r
+                        }\r
+                    }\r
+                    if (bcpTypeAliasMap != null) {\r
+                        Set<String> bcpTypeAliasSet = bcpTypeAliasMap.get(bcpTypeId);\r
+                        if (bcpTypeAliasSet != null) {\r
+                            for (String alias : bcpTypeAliasSet) {\r
+                                typeDataMap.put(AsciiUtil.toLowerString(alias), t);\r
+                            }\r
+                        }\r
+                    }\r
+                }\r
+            }\r
+\r
+            EnumSet<SpecialType> specialTypes = null;\r
+            if (specialTypeSet != null) {\r
+                specialTypes = EnumSet.copyOf(specialTypeSet);\r
+            }\r
+\r
+            KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypes);\r
+\r
+            KEYMAP.put(AsciiUtil.toLowerString(legacyKeyId), keyData);\r
+            if (!hasSameKey) {\r
+                KEYMAP.put(AsciiUtil.toLowerString(bcpKeyId), keyData);\r
+            }\r
+        }\r
+    }\r
+\r
+    //\r
+    // Note:    The key-type data is currently read from ICU resource bundle keyTypeData.res.\r
+    //          In future, we may import the data into code like below directly from CLDR to\r
+    //          avoid cyclic dependency between ULocale and UResourceBundle. For now, the code\r
+    //          below is just for proof of concept, and commented out.\r
+    //\r
+\r
+//    private static final String[][] TYPE_DATA_CA = {\r
+//     // {<legacy type>, <bcp type - if different>},\r
+//        {"buddhist", null},\r
+//        {"chinese", null},\r
+//        {"coptic", null},\r
+//        {"dangi", null},\r
+//        {"ethiopic", null},\r
+//        {"ethiopic-amete-alem", "ethioaa"},\r
+//        {"gregorian", "gregory"},\r
+//        {"hebrew", null},\r
+//        {"indian", null},\r
+//        {"islamic", null},\r
+//        {"islamic-civil", null},\r
+//        {"islamic-rgsa", null},\r
+//        {"islamic-tbla", null},\r
+//        {"islamic-umalqura", null},\r
+//        {"iso8601", null},\r
+//        {"japanese", null},\r
+//        {"persian", null},\r
+//        {"roc", null},\r
+//    };\r
+//\r
+//    private static final String[][] TYPE_DATA_KS = {\r
+//     // {<legacy type>, <bcp type - if different>},\r
+//        {"identical", "identic"},\r
+//        {"primary", "level1"},\r
+//        {"quaternary", "level4"},\r
+//        {"secondary", "level2"},\r
+//        {"tertiary", "level3"},\r
+//    };\r
+//\r
+//    private static final String[][] TYPE_ALIAS_KS = {\r
+//     // {<legacy alias>, <legacy canonical>},\r
+//        {"quarternary", "quaternary"},\r
+//    };\r
+//\r
+//    private static final String[][] BCP_TYPE_ALIAS_CA = {\r
+//     // {<bcp deprecated>, <bcp preferred>\r
+//        {"islamicc", "islamic-civil"},\r
+//    };\r
+//\r
+//    private static final Object[][] KEY_DATA = {\r
+//     // {<legacy key>, <bcp key - if different>, <type map>, <type alias>, <bcp type alias>},\r
+//        {"calendar", "ca", TYPE_DATA_CA, null, BCP_TYPE_ALIAS_CA},\r
+//        {"colstrength", "ks", TYPE_DATA_KS, TYPE_ALIAS_KS, null},\r
+//    };\r
+\r
+    private static final Object[][] KEY_DATA = {};\r
+\r
+    @SuppressWarnings("unused")\r
+    private static void initFromTables() {\r
+        for (Object[] keyDataEntry : KEY_DATA) {\r
+            String legacyKeyId = (String)keyDataEntry[0];\r
+            String bcpKeyId = (String)keyDataEntry[1];\r
+            String[][] typeData = (String[][])keyDataEntry[2];\r
+            String[][] typeAliasData = (String[][])keyDataEntry[3];\r
+            String[][] bcpTypeAliasData = (String[][])keyDataEntry[4];\r
+\r
+            boolean hasSameKey = false;\r
+            if (bcpKeyId == null) {\r
+                bcpKeyId = legacyKeyId;\r
+                hasSameKey = true;\r
+            }\r
+\r
+            // reverse type alias map\r
+            Map<String, Set<String>> typeAliasMap = null;\r
+            if (typeAliasData != null) {\r
+                typeAliasMap = new HashMap<String, Set<String>>();\r
+                for (String[] typeAliasDataEntry : typeAliasData) {\r
+                    String from = typeAliasDataEntry[0];\r
+                    String to = typeAliasDataEntry[1];\r
+                    Set<String> aliasSet = typeAliasMap.get(to);\r
+                    if (aliasSet == null) {\r
+                        aliasSet = new HashSet<String>();\r
+                        typeAliasMap.put(to, aliasSet);\r
+                    }\r
+                    aliasSet.add(from);\r
+                }\r
+            }\r
+\r
+            // BCP type alias map data\r
+            Map<String, Set<String>> bcpTypeAliasMap = null;\r
+            if (bcpTypeAliasData != null) {\r
+                bcpTypeAliasMap = new HashMap<String, Set<String>>();\r
+                for (String[] bcpTypeAliasDataEntry : bcpTypeAliasData) {\r
+                    String from = bcpTypeAliasDataEntry[0];\r
+                    String to = bcpTypeAliasDataEntry[1];\r
+                    Set<String> aliasSet = bcpTypeAliasMap.get(to);\r
+                    if (aliasSet == null) {\r
+                        aliasSet = new HashSet<String>();\r
+                        bcpTypeAliasMap.put(to, aliasSet);\r
+                    }\r
+                    aliasSet.add(from);\r
+                }\r
+            }\r
+\r
+            // Type map data\r
+            assert typeData != null;\r
+            Map<String, Type> typeDataMap = new HashMap<String, Type>();\r
+            Set<SpecialType> specialTypeSet = null;\r
+\r
+            for (String[] typeDataEntry : typeData) {\r
+                String legacyTypeId = typeDataEntry[0];\r
+                String bcpTypeId = typeDataEntry[1];\r
+\r
+                // special types\r
+                boolean isSpecialType = false;\r
+                for (SpecialType st : SpecialType.values()) {\r
+                    if (legacyTypeId.equals(st.toString())) {\r
+                        isSpecialType = true;\r
+                        if (specialTypeSet == null) {\r
+                            specialTypeSet = new HashSet<SpecialType>();\r
+                        }\r
+                        specialTypeSet.add(st);\r
+                        break;\r
+                    }\r
+                }\r
+                if (isSpecialType) {\r
+                    continue;\r
+                }\r
+\r
+                boolean hasSameType = false;\r
+                if (bcpTypeId == null) {\r
+                    bcpTypeId = legacyTypeId;\r
+                    hasSameType = true;\r
+                }\r
+\r
+                // Note: legacy type value should never be\r
+                // equivalent to bcp type value of a different\r
+                // type under the same key. So we use a single\r
+                // map for lookup.\r
+                Type t = new Type(legacyTypeId, bcpTypeId);\r
+                typeDataMap.put(AsciiUtil.toLowerString(legacyTypeId), t);\r
+                if (!hasSameType) {\r
+                    typeDataMap.put(AsciiUtil.toLowerString(bcpTypeId), t);\r
+                }\r
+\r
+                // Also put aliases in the index\r
+                Set<String> typeAliasSet = typeAliasMap.get(legacyTypeId);\r
+                if (typeAliasSet != null) {\r
+                    for (String alias : typeAliasSet) {\r
+                        typeDataMap.put(AsciiUtil.toLowerString(alias), t);\r
+                    }\r
+                }\r
+                Set<String> bcpTypeAliasSet = bcpTypeAliasMap.get(bcpTypeId);\r
+                if (bcpTypeAliasSet != null) {\r
+                    for (String alias : bcpTypeAliasSet) {\r
+                        typeDataMap.put(AsciiUtil.toLowerString(alias), t);\r
+                    }\r
+                }\r
+            }\r
+\r
+            EnumSet<SpecialType> specialTypes = null;\r
+            if (specialTypeSet != null) {\r
+                specialTypes = EnumSet.copyOf(specialTypeSet);\r
+            }\r
+\r
+            KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypes);\r
+\r
+            KEYMAP.put(AsciiUtil.toLowerString(legacyKeyId), keyData);\r
+            if (!hasSameKey) {\r
+                KEYMAP.put(AsciiUtil.toLowerString(bcpKeyId), keyData);\r
+            }\r
+        }\r
+    }\r
+\r
+    private static final Map<String, KeyData> KEYMAP;\r
+\r
+    static {\r
+        KEYMAP = new HashMap<String, KeyData>();\r
+//        initFromTables();\r
+        initFromResourceBundle();\r
+    }\r
+\r
+}\r
index 5a3e3f354996ceb771d30a5a18408b837e00c4cd..96b50b74ddc5923d332904718cf917ced6308c52 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *******************************************************************************
- * Copyright (C) 2009-2010, International Business Machines Corporation and    *
+ * Copyright (C) 2009-2014, International Business Machines Corporation and    *
  * others. All Rights Reserved.                                                *
  *******************************************************************************
  */
@@ -99,4 +99,23 @@ public class UnicodeLocaleExtension extends Extension {
         // 3*8alphanum
         return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
     }
+
+    public static boolean isType(String s) {
+        // sequence of type subtags delimited by '-'
+        int startIdx = 0;
+        boolean sawSubtag = false;
+        while (true) {
+            int idx = s.indexOf(LanguageTag.SEP, startIdx);
+            String subtag = idx < 0 ? s.substring(startIdx) : s.substring(startIdx, idx);
+            if (!isTypeSubtag(subtag)) {
+                return false;
+            }
+            sawSubtag = true;
+            if (idx < 0) {
+                break;
+            }
+            startIdx = idx + 1;
+        }
+        return sawSubtag && startIdx < s.length();
+    }
 }
index b9c5c01538f32e499052ea416b8f9706b94c89fb..bc57266c4fd68d683484544c9f2e613870eb8897 100644 (file)
@@ -40,6 +40,7 @@ import com.ibm.icu.impl.locale.LocaleExtensions;
 import com.ibm.icu.impl.locale.LocaleSyntaxException;
 import com.ibm.icu.impl.locale.ParseStatus;
 import com.ibm.icu.impl.locale.UnicodeLocaleExtension;
+import com.ibm.icu.impl.locale.KeyTypeData;
 import com.ibm.icu.text.LocaleDisplayNames;
 import com.ibm.icu.text.LocaleDisplayNames.DialectHandling;
 
@@ -3218,6 +3219,149 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
         return getInstance(bldr.getBaseLocale(), bldr.getLocaleExtensions());
     }
 
+    /**
+     * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+     * extension key) to the equivalent BCP 47 Unicode locale extension key.
+     * For example, BCP 47 Unicode locale extension key "co" is returned for
+     * the input keyword "collation".
+     * <p>
+     * When the specified keyword is unknown, but satisfies the BCP syntax,
+     * then the lower-case version of the input keyword will be returned.
+     * For example,
+     * <code>toUnicodeLocaleKey("ZZ")</code> returns "zz".
+     * 
+     * @param keyword       the input locale keyword (either legacy key
+     *                      such as "collation" or BCP 47 Unicode locale extension
+     *                      key such as "co").
+     * @return              the well-formed BCP 47 Unicode locale extension key,
+     *                      or null if the specified locale keyword cannot be mapped
+     *                      to a well-formed BCP 47 Unicode locale extension key. 
+     * @see #toLegacyKey(String)
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static String toUnicodeLocaleKey(String keyword) {
+        String uniLocKey = KeyTypeData.toBcpKey(keyword);
+        if (uniLocKey == null && UnicodeLocaleExtension.isKey(keyword)) {
+            // unknown keyword, but syntax is fine..
+            uniLocKey = AsciiUtil.toLowerString(keyword);
+        }
+        return uniLocKey;
+    }
+
+    /**
+     * Converts the specified keyword value (legacy type, or BCP 47
+     * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+     * extension type for the specified keyword (category). For example, BCP 47
+     * Unicode locale extension type "phonebk" is returned for the input
+     * keyword value "phonebook", with the keyword "collation" (or "co").
+     * <p>
+     * When the specified keyword is not recognized, but the specified value
+     * satisfies the syntax of the BCP 47 Unicode locale extension type,
+     * or when the specified keyword allows 'variable' type and the specified
+     * value satisfies the syntax, the lower-case version of the input value
+     * will be returned. For example,
+     * <code>toUnicodeLocaleType("Foo", "Bar")</code> returns "bar",
+     * <code>toUnicodeLocaleType("variableTop", "00A4")</code> returns "00a4".
+     * 
+     * @param keyword       the locale keyword (either legacy key such as
+     *                      "collation" or BCP 47 Unicode locale extension
+     *                      key such as "co").
+     * @param value         the locale keyword value (either legacy type
+     *                      such as "phonebook" or BCP 47 Unicode locale extension
+     *                      type such as "phonebk").
+     * @return              the well-formed BCP47 Unicode locale extension type,
+     *                      or null if the locale keyword value cannot be mapped to
+     *                      a well-formed BCP 47 Unicode locale extension type.
+     * @see #toLegacyType(String, String)
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static String toUnicodeLocaleType(String keyword, String value) {
+        String bcpType = KeyTypeData.toBcpType(keyword, value, null, null);
+        if (bcpType == null && UnicodeLocaleExtension.isType(value)) {
+            // unknown keyword, but syntax is fine..
+            bcpType = AsciiUtil.toLowerString(value);
+        }
+        return bcpType;
+    }
+
+    /**
+     * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+     * legacy key) to the legacy key. For example, legacy key "collation" is
+     * returned for the input BCP 47 Unicode locale extension key "co".
+     * 
+     * @param keyword       the input locale keyword (either BCP 47 Unicode locale
+     *                      extension key or legacy key).
+     * @return              the well-formed legacy key, or null if the specified
+     *                      keyword cannot be mapped to a well-formed legacy key.
+     * @see #toUnicodeLocaleKey(String)
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static String toLegacyKey(String keyword) {
+        String legacyKey = KeyTypeData.toLegacyKey(keyword);
+        if (legacyKey == null) {
+            // Checks if the specified locale key is well-formed with the legacy locale syntax.
+            //
+            // Note:
+            //  Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+            //  However, a key should not contain '=' obviously. For now, all existing
+            //  keys are using ASCII alphabetic letters only. We won't add any new key
+            //  that is not compatible with the BCP 47 syntax. Therefore, we assume
+            //  a valid key consist from [0-9a-zA-Z], no symbols.
+            if (keyword.matches("[0-9a-zA-Z]*")) {
+                legacyKey = AsciiUtil.toLowerString(keyword);
+            }
+        }
+        return legacyKey;
+    }
+
+    /**
+     * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+     * or legacy type or type alias) to the canonical legacy type. For example,
+     * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+     * locale extension type "phonebk" with the keyword "collation" (or "co").
+     * <p>
+     * When the specified keyword is not recognized, but the specified value
+     * satisfies the syntax of legacy key, or when the specified keyword
+     * allows 'variable' type and the specified value satisfies the syntax,
+     * the lower-case version of the input value will be returned.
+     * For example,
+     * <code>toLegacyType("Foo", "Bar")</code> returns "bar",
+     * <code>toLegacyType("vt", "00A4")</code> returns "00a4".
+     *
+     * @param keyword       the locale keyword (either legacy keyword such as
+     *                      "collation" or BCP 47 Unicode locale extension
+     *                      key such as "co").
+     * @param value         the locale keyword value (either BCP 47 Unicode locale
+     *                      extension type such as "phonebk" or legacy keyword value
+     *                      such as "phonebook").
+     * @return              the well-formed legacy type, or null if the specified
+     *                      keyword value cannot be mapped to a well-formed legacy
+     *                      type.
+     * @see #toUnicodeLocaleType(String, String)
+     * @draft ICU 54
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static String toLegacyType(String keyword, String value) {
+        String legacyType = KeyTypeData.toLegacyType(keyword, value, null, null);
+        if (legacyType == null) {
+            // Checks if the specified locale type is well-formed with the legacy locale syntax.
+            //
+            // Note:
+            //  Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+            //  However, a type should not contain '=' obviously. For now, all existing
+            //  types are using ASCII alphabetic letters with a few symbol letters. We won't
+            //  add any new type that is not compatible with the BCP 47 syntax except timezone
+            //  IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
+            //  '-' '_' '/' in the middle.
+            if (value.matches("[0-9a-zA-Z]+([_/\\-][0-9a-zA-Z]+)*")) {
+                legacyType = AsciiUtil.toLowerString(value);
+            }
+        }
+        return legacyType;
+    }
 
     /**
      * <code>Builder</code> is used to build instances of <code>ULocale</code>
@@ -3591,8 +3735,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
                     for (String bcpKey : ukeys) {
                         String bcpType = uext.getUnicodeLocaleType(bcpKey);
                         // convert to legacy key/type
-                        String lkey = bcp47ToLDMLKey(bcpKey);
-                        String ltype = bcp47ToLDMLType(lkey, ((bcpType.length() == 0) ? "yes" : bcpType)); // use "yes" as the value of typeless keywords
+                        String lkey = toLegacyKey(bcpKey);
+                        String ltype = toLegacyType(bcpKey, ((bcpType.length() == 0) ? "yes" : bcpType)); // use "yes" as the value of typeless keywords
                         // special handling for u-va-posix, since this is a variant, not a keyword
                         if (lkey.equals("va") && ltype.equals("posix") && base.getVariant().length() == 0) {
                             id = id + "_POSIX";
@@ -3675,8 +3819,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
                             }
                         }
                     } else if (key.length() >= 2) {
-                        String bcpKey = ldmlKeyToBCP47(key);
-                        String bcpType = ldmlTypeToBCP47(key, getKeywordValue(key));
+                        String bcpKey = toUnicodeLocaleKey(key);
+                        String bcpType = toUnicodeLocaleType(key, getKeywordValue(key));
                         if (bcpKey != null && bcpType != null) {
                             try {
                                 intbld.setUnicodeLocaleKeyword(bcpKey, bcpType);
@@ -3699,161 +3843,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
         return extensions;
     }
 
-    //
-    // LDML legacy/BCP47 key and type mapping functions
-    //
-    private static String ldmlKeyToBCP47(String key) {
-        UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
-                                            ICUResourceBundle.ICU_BASE_NAME,
-                                            "keyTypeData",
-                                            ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-        UResourceBundle keyMap = keyTypeData.get("keyMap");
-
-        // normalize key to lowercase
-        key = AsciiUtil.toLowerString(key);
-        String bcpKey = null;
-        try {
-            bcpKey = keyMap.getString(key);
-            if (bcpKey.length() == 0) {
-                // empty value indicates the BCP47 key is same with the legacy key
-                bcpKey = key;
-            }
-        } catch (MissingResourceException mre) {
-            // fall through
-        }
-
-        if (bcpKey == null) {
-            if (key.length() == 2 && LanguageTag.isExtensionSubtag(key)) {
-                return key;
-            }
-            return null;
-        }
-        return bcpKey;
-    }
-
-    private static String bcp47ToLDMLKey(String bcpKey) {
-        UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
-                                            ICUResourceBundle.ICU_BASE_NAME,
-                                            "keyTypeData",
-                                            ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-        UResourceBundle keyMap = keyTypeData.get("keyMap");
-
-        // normalize bcp key to lowercase
-        bcpKey = AsciiUtil.toLowerString(bcpKey);
-        String key = null;
-        for (int i = 0; i < keyMap.getSize(); i++) {
-            UResourceBundle mapData = keyMap.get(i);
-            String tmpBcpKey = mapData.getString();
-            if (tmpBcpKey.length() == 0) {
-                // empty value indicates the BCP47 key is same with the legacy key
-                tmpBcpKey = mapData.getKey();
-            }
-            if (bcpKey.equals(tmpBcpKey)) {
-                key = mapData.getKey();
-                break;
-            }
-        }
-        if (key == null) {
-            return bcpKey;
-        }
-        return key;
-    }
-
-    private static String ldmlTypeToBCP47(String key, String type) {
-        UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
-                                            ICUResourceBundle.ICU_BASE_NAME,
-                                            "keyTypeData",
-                                            ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-        UResourceBundle typeMap = keyTypeData.get("typeMap");
-
-        // keys are case-insensitive, while types are case-sensitive
-        // TODO: make types case insensitive
-        key = AsciiUtil.toLowerString(key);
-        UResourceBundle typeMapForKey = null;
-        String bcpType = null;
-        String typeResKey = key.equals("timezone") ? type.replace('/', ':') : type;
-        try {
-            typeMapForKey = typeMap.get(key);
-            bcpType = typeMapForKey.getString(typeResKey);
-            if (bcpType.length() == 0) {
-                // empty value indicates the BCP47 type is same with the legacy type
-                bcpType = type;
-            }
-        } catch (MissingResourceException mre) {
-            // fall through
-        }
-
-        if (bcpType == null && typeMapForKey != null) {
-            // is this type alias?
-            UResourceBundle typeAlias = keyTypeData.get("typeAlias");
-            try {
-                UResourceBundle typeAliasForKey = typeAlias.get(key);
-                typeResKey = typeAliasForKey.getString(typeResKey);
-                bcpType = typeMapForKey.getString(typeResKey.replace('/', ':'));
-                if (bcpType.length() == 0) {
-                    // empty value indicates the BCP47 type is same with the legacy type
-                    bcpType = typeResKey;
-                }
-            } catch (MissingResourceException mre) {
-                // fall through
-            }
-        }
-
-        if (bcpType == null) {
-            int typeLen = type.length();
-            if (typeLen >= 3 && typeLen <= 8 && LanguageTag.isExtensionSubtag(type)) {
-                return type;
-            }
-            return null;
-        }
-        return bcpType;
-    }
-
-    private static String bcp47ToLDMLType(String key, String bcpType) {
-        UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
-                                            ICUResourceBundle.ICU_BASE_NAME,
-                                            "keyTypeData",
-                                            ICUResourceBundle.ICU_DATA_CLASS_LOADER);
-        UResourceBundle typeMap = keyTypeData.get("typeMap");
-
-        // normalize key/bcpType to lowercase
-        key = AsciiUtil.toLowerString(key);
-        bcpType = AsciiUtil.toLowerString(bcpType);
-
-        String type = null;
-        try {
-            UResourceBundle typeMapForKey = typeMap.get(key);
-
-            // Note:    Linear search for time zone ID might be too slow.
-            //          ICU services do not use timezone keywords for now.
-            //          In future, we may need to build the optimized inverse
-            //          lookup table.
-
-            for (int i = 0; i < typeMapForKey.getSize(); i++) {
-                UResourceBundle mapData = typeMapForKey.get(i);
-                String tmpBcpType = mapData.getString();
-                if (tmpBcpType.length() == 0) {
-                    // empty value indicates the BCP47 type is same with the legacy type
-                    tmpBcpType = mapData.getKey();
-                }
-                if (bcpType.equals(tmpBcpType)) {
-                    type = mapData.getKey();
-                    if (key.equals("timezone")) {
-                        type = type.replace(':', '/');
-                    }
-                    break;
-                }
-            }
-        } catch (MissingResourceException mre) {
-            // fall through
-        }
-
-        if (type == null) {
-            return bcpType;
-        }
-        return type;
-    }
-
     /*
      * JDK Locale Helper
      */
@@ -4073,9 +4062,9 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
 
                     if (kwKey.length() != 1) {
                         // Unicode locale key
-                        kwKey = bcp47ToLDMLKey(kwKey);
+                        kwKey = toLegacyKey(kwKey);
                         // use "yes" as the value of typeless keywords
-                        kwVal = bcp47ToLDMLType(kwKey, ((kwVal.length() == 0) ? "yes" : kwVal));
+                        kwVal = toLegacyType(kwKey, ((kwVal.length() == 0) ? "yes" : kwVal));
                     }
 
                     if (addSep) {
index 027980554cc96baeba249e7a31a090bab7a83b4a..93c3b3a09916770f98d408c3195edd28fac8998a 100644 (file)
@@ -3874,7 +3874,7 @@ public class ULocaleTest extends TestFmwk {
                 {"en@timezone=America/New_York;calendar=japanese",    "en-u-ca-japanese-tz-usnyc"},
                 {"en@timezone=US/Eastern",    "en-u-tz-usnyc"},
                 {"en@x=x-y-z;a=a-b-c",  "en-x-x-y-z"},
-                {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic"},
+                {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic"},
                 {"en_US_POSIX", "en-US-u-va-posix"},
                 {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix"},
                 {"@x=elmer",    "x-elmer"},
@@ -4449,4 +4449,108 @@ public class ULocaleTest extends TestFmwk {
             }
         }
     }
+
+    public void TestToUnicodeLocaleKey() {
+        String[][] DATA = {
+                {"calendar",    "ca"},
+                {"CALEndar",    "ca"},  // difference casing
+                {"ca",          "ca"},  // bcp key itself
+                {"kv",          "kv"},  // no difference between legacy and bcp
+                {"foo",         null},  // unknown, bcp ill-formed
+                {"ZZ",          "zz"},  // unknown, bcp well-formed
+        };
+
+        for (String[] d : DATA) {
+            String keyword = d[0];
+            String expected = d[1];
+
+            String bcpKey = ULocale.toUnicodeLocaleKey(keyword);
+            assertEquals("keyword=" + keyword, expected, bcpKey);
+        }
+    }
+
+    public void TestToLegacyKey() {
+        String[][] DATA = {
+                {"kb",          "colbackwards"},
+                {"kB",          "colbackwards"},    // different casing
+                {"Collation",   "collation"},   // keyword itself with different casing
+                {"kv",          "kv"},  // no difference between legacy and bcp
+                {"foo",         "foo"}, // unknown, bcp ill-formed
+                {"ZZ",          "zz"},  // unknown, bcp well-formed
+                {"e=mc2",       null},  // unknown, bcp/legacy ill-formed
+        };
+
+        for (String[] d : DATA) {
+            String keyword = d[0];
+            String expected = d[1];
+
+            String legacyKey = ULocale.toLegacyKey(keyword);
+            assertEquals("bcpKey=" + keyword, expected, legacyKey);
+        }
+    }
+
+    public void TestToUnicodeLocaleType() {
+        String[][] DATA = {
+                {"tz",              "Asia/Kolkata",     "inccu"},
+                {"calendar",        "gregorian",        "gregory"},
+                {"ca",              "gregorian",        "gregory"},
+                {"ca",              "Gregorian",        "gregory"},
+                {"ca",              "buddhist",         "buddhist"},
+                {"Calendar",        "Japanese",         "japanese"},
+                {"calendar",        "Islamic-Civil",    "islamic-civil"},
+                {"calendar",        "islamicc",         "islamic-civil"},   // bcp type alias
+                {"colalternate",    "NON-IGNORABLE",    "noignore"},
+                {"colcaselevel",    "yes",              "true"},
+                {"tz",              "america/new_york", "usnyc"},
+                {"tz",              "Asia/Kolkata",     "inccu"},
+                {"timezone",        "navajo",           "usden"},
+                {"ca",              "aaaa",             "aaaa"},    // unknown type, well-formed type
+                {"ca",              "gregory-japanese-islamic", "gregory-japanese-islamic"},    // unknown type, well-formed type
+                {"zz",              "gregorian",        null},      // unknown key, ill-formed type
+                {"co",              "foo-",             null},      // unknown type, ill-formed type
+        };
+
+        for (String[] d : DATA) {
+            String keyword = d[0];
+            String value = d[1];
+            String expected = d[2];
+
+            String bcpType = ULocale.toUnicodeLocaleType(keyword, value);
+            assertEquals("keyword=" + keyword + ", value=" + value, expected, bcpType);
+        }
+
+    }
+
+    public void TestToLegacyType() {
+        String[][] DATA = {
+                {"calendar",        "gregory",          "gregorian"},
+                {"ca",              "gregory",          "gregorian"},
+                {"ca",              "Gregory",          "gregorian"},
+                {"ca",              "buddhist",         "buddhist"},
+                {"Calendar",        "Japanese",         "japanese"},
+                {"calendar",        "Islamic-Civil",    "islamic-civil"},
+                {"calendar",        "islamicc",         "islamic-civil"},   // bcp type alias
+                {"colalternate",    "noignore",         "non-ignorable"},
+                {"colcaselevel",    "true",             "yes"},
+                {"tz",              "usnyc",            "America/New_York"},
+                {"tz",              "inccu",            "Asia/Calcutta"},
+                {"timezone",        "usden",            "America/Denver"},
+                {"timezone",        "usnavajo",         "America/Denver"},  // bcp type alias
+                {"colstrength",     "quarternary",      "quaternary"},  // type alias
+                {"ca",              "aaaa",             "aaaa"},    // unknown type
+                {"calendar",        "gregory-japanese-islamic", "gregory-japanese-islamic"},    // unknown type, well-formed type
+                {"zz",              "gregorian",        "gregorian"},   // unknown key, bcp ill-formed type
+                {"ca",              "gregorian-calendar",   "gregorian-calendar"},  // known key, bcp ill-formed type
+                {"co",              "e=mc2",            null},  // known key, ill-formed bcp/legacy type
+        };
+
+        for (String[] d : DATA) {
+            String keyword = d[0];
+            String value = d[1];
+            String expected = d[2];
+
+            String legacyType = ULocale.toLegacyType(keyword, value);
+            assertEquals("keyword=" + keyword + ", value="  + value, expected, legacyType);
+        }
+    }
 }