]> granicus.if.org Git - icu/commitdiff
ICU-8951 BCP 47 key/type converter APIs merged from the work branch.
authorYoshito Umaoka <y.umaoka@gmail.com>
Wed, 3 Sep 2014 04:42:21 +0000 (04:42 +0000)
committerYoshito Umaoka <y.umaoka@gmail.com>
Wed, 3 Sep 2014 04:42:21 +0000 (04:42 +0000)
X-SVN-Rev: 36320

12 files changed:
.gitattributes
icu4c/source/common/Makefile.in
icu4c/source/common/common.vcxproj
icu4c/source/common/common.vcxproj.filters
icu4c/source/common/ucln_cmn.h
icu4c/source/common/uloc.cpp
icu4c/source/common/uloc_keytype.cpp [new file with mode: 0644]
icu4c/source/common/uloc_tag.c
icu4c/source/common/ulocimp.h
icu4c/source/common/unicode/uloc.h
icu4c/source/test/cintltst/cloctst.c
icu4c/source/test/cintltst/cloctst.h

index 2aa831b9bdcd2a352ea78d843096136f569b1f20..ca69ef34e5e3bb384c9c193106c6690f607a35a5 100644 (file)
@@ -53,6 +53,7 @@ icu4c/source/aclocal.m4 -text
 icu4c/source/allinone/icucheck.bat -text
 icu4c/source/common/common.vcxproj -text
 icu4c/source/common/common.vcxproj.filters -text
+icu4c/source/common/uloc_keytype.cpp -text
 icu4c/source/common/unifiedcache.cpp -text
 icu4c/source/common/unifiedcache.h -text
 icu4c/source/data/coll/dsb.txt -text
index 2d8b12c0ab00cb16044ecadac45f2d4013ebe319..635b3f2630f5f6bcb96d2adf07671a6a63cfb3d0 100644 (file)
@@ -105,7 +105,7 @@ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
 uidna.o usprep.o uts46.o punycode.o \
 util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
 ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o \
-sharedobject.o simplepatternformatter.o unifiedcache.o
+sharedobject.o simplepatternformatter.o unifiedcache.o uloc_keytype.o
 
 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
index 12f993feb413a131f07872bb958aab8df52f4b28..5bf22962f6f0134ebd8e0b8b68305a56a3630d88 100644 (file)
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>\r
+<?xml version="1.0" encoding="utf-8"?>\r
 <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
   <ItemGroup Label="ProjectConfigurations">\r
     <ProjectConfiguration Include="Debug|Win32">\r
     <ClCompile Include="ubidi_props.c" />\r
     <ClCompile Include="ubidiln.c" />\r
     <ClCompile Include="ubidiwrt.c" />\r
+    <ClCompile Include="uloc_keytype.cpp" />\r
     <ClCompile Include="ushape.cpp" />\r
     <ClCompile Include="brkeng.cpp">\r
     </ClCompile>\r
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
   <ImportGroup Label="ExtensionTargets">\r
   </ImportGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
index a4571cf92e8b43f11156fa105b9f2f16f17471cd..9befe71777f48c7e635776019d1086f7911f050a 100644 (file)
     <ClCompile Include="usetiter.cpp">\r
       <Filter>properties &amp; sets</Filter>\r
     </ClCompile>\r
-    <ClCompile Include="icuplug.c">\r
-      <Filter>registration</Filter>\r
-    </ClCompile>\r
     <ClCompile Include="serv.cpp">\r
       <Filter>registration</Filter>\r
     </ClCompile>\r
     <ClCompile Include="stringtriebuilder.cpp">\r
       <Filter>collections</Filter>\r
     </ClCompile>\r
+    <ClCompile Include="icuplug.cpp" />\r
+    <ClCompile Include="uloc_keytype.cpp">\r
+      <Filter>locales &amp; resources</Filter>\r
+    </ClCompile>\r
   </ItemGroup>\r
   <ItemGroup>\r
     <ClInclude Include="ubidi_props.h">\r
       <Filter>collections</Filter>\r
     </CustomBuild>\r
   </ItemGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
index 0e2abc6a520e38af34d2142608a347ea6d0bb04d..2290de868ba7a59252cc3ff318f02826635dea75 100644 (file)
@@ -37,6 +37,7 @@ typedef enum ECleanupCommonType {
     UCLN_COMMON_BREAKITERATOR,
     UCLN_COMMON_BREAKITERATOR_DICT,
     UCLN_COMMON_SERVICE,
+    UCLN_COMMON_LOCALE_KEY_TYPE,
     UCLN_COMMON_LOCALE,
     UCLN_COMMON_LOCALE_AVAILABLE,
     UCLN_COMMON_ULOC,
index d8d8ad28cdf6cbfbef09d2d74a92f62d7457b162..13083b0c8e3dbb5d81c26aa614a27bb9d6e21531 100644 (file)
@@ -2524,4 +2524,103 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
     return -1;
 }
 
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword)
+{
+    const char* bcpKey = ulocimp_toBcpKey(keyword);
+    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
+        // unknown keyword, but syntax is fine..
+        return keyword;
+    }
+    return bcpKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value)
+{
+    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
+    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
+        // unknown keyword, but syntax is fine..
+        return value;
+    }
+    return bcpType;
+}
+
+#define ISALPHANUM(c) ( (c) >= '0' && (c) <= '9' || (c) >= 'A' && (c) <= 'Z' || (c) >= 'a' && (c) <= 'z' )
+
+static UBool
+isWellFormedLegacyKey(const char* legacyKey)
+{
+    const char* p = legacyKey;
+    while (*p) {
+        if (!ISALPHANUM(*p)) {
+            return FALSE;
+        }
+        p++;
+    }
+    return TRUE;
+}
+
+static UBool
+isWellFormedLegacyType(const char* legacyType)
+{
+    const char* p = legacyType;
+    int32_t alphaNumLen = 0;
+    while (*p) {
+        if (*p == '_' || *p == '/' || *p == '-') {
+            if (alphaNumLen == 0) {
+                return FALSE;
+            }
+            alphaNumLen = 0;
+        } else if (ISALPHANUM(*p)) {
+            alphaNumLen++;
+        } else {
+            return FALSE;
+        }
+        p++;
+    }
+    return (alphaNumLen != 0);
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword)
+{
+    const char* legacyKey = ulocimp_toLegacyKey(keyword);
+    if (legacyKey == NULL) {
+        // Checks if the specified locale key is well-formed with the legacy locale syntax.
+        //
+        // Note:
+        //  Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+        //  However, a key should not contain '=' obviously. For now, all existing
+        //  keys are using ASCII alphabetic letters only. We won't add any new key
+        //  that is not compatible with the BCP 47 syntax. Therefore, we assume
+        //  a valid key consist from [0-9a-zA-Z], no symbols.
+        if (isWellFormedLegacyKey(keyword)) {
+            return keyword;
+        }
+    }
+    return legacyKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value)
+{
+    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
+    if (legacyType == NULL) {
+        // Checks if the specified locale type is well-formed with the legacy locale syntax.
+        //
+        // Note:
+        //  Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+        //  However, a type should not contain '=' obviously. For now, all existing
+        //  types are using ASCII alphabetic letters with a few symbol letters. We won't
+        //  add any new type that is not compatible with the BCP 47 syntax except timezone
+        //  IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
+        //  '-' '_' '/' in the middle.
+        if (isWellFormedLegacyType(value)) {
+            return value;
+        }
+    }
+    return legacyType;
+}
+
 /*eof*/
diff --git a/icu4c/source/common/uloc_keytype.cpp b/icu4c/source/common/uloc_keytype.cpp
new file mode 100644 (file)
index 0000000..896ea9e
--- /dev/null
@@ -0,0 +1,577 @@
+/*\r
+**********************************************************************\r
+*   Copyright (C) 2014, International Business Machines\r
+*   Corporation and others.  All Rights Reserved.\r
+**********************************************************************\r
+*/\r
+#include "unicode/utypes.h"\r
+\r
+#include "cstring.h"\r
+#include "uassert.h"\r
+#include "ucln_cmn.h"\r
+#include "uhash.h"\r
+#include "umutex.h"\r
+#include "uresimp.h"\r
+#include "uvector.h"\r
+\r
+static UHashtable* gLocExtKeyMap = NULL;\r
+static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;\r
+static icu::UVector* gKeyTypeStringPool = NULL;\r
+static icu::UVector* gLocExtKeyDataEntries = NULL;\r
+static icu::UVector* gLocExtTypeEntries = NULL;\r
+\r
+// bit flags for special types\r
+typedef enum {\r
+    SPECIALTYPE_NONE = 0,\r
+    SPECIALTYPE_CODEPOINTS = 1,\r
+    SPECIALTYPE_REORDER_CODE = 2\r
+} SpecialType;\r
+\r
+typedef struct LocExtKeyData {\r
+    const char*     legacyId;\r
+    const char*     bcpId;\r
+    UHashtable*     typeMap;\r
+    uint32_t        specialTypes;\r
+} LocExtKeyData;\r
+\r
+typedef struct LocExtType {\r
+    const char*     legacyId;\r
+    const char*     bcpId;\r
+} LocExtType;\r
+\r
+U_CDECL_BEGIN\r
+\r
+static UBool U_CALLCONV\r
+uloc_key_type_cleanup(void) {\r
+    if (gLocExtKeyMap != NULL) {\r
+        uhash_close(gLocExtKeyMap);\r
+        gLocExtKeyMap = NULL;\r
+    }\r
+\r
+    delete gLocExtKeyDataEntries;\r
+    gLocExtKeyDataEntries = NULL;\r
+\r
+    delete gLocExtTypeEntries;\r
+    gLocExtTypeEntries = NULL;\r
+\r
+    delete gKeyTypeStringPool;\r
+    gKeyTypeStringPool = NULL;\r
+\r
+    gLocExtKeyMapInitOnce.reset();\r
+    return TRUE;\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteKeyTypeStringPoolEntry(void* obj) {\r
+    uprv_free(obj);\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteKeyDataEntry(void* obj) {\r
+    LocExtKeyData* keyData = (LocExtKeyData*)obj;\r
+    if (keyData->typeMap != NULL) {\r
+        uhash_close(keyData->typeMap);\r
+    }\r
+    uprv_free(keyData);\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteTypeEntry(void* obj) {\r
+    uprv_free(obj);\r
+}\r
+\r
+U_CDECL_END\r
+\r
+\r
+static void U_CALLCONV\r
+initFromResourceBundle(UErrorCode& sts) {\r
+    ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);\r
+\r
+    gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);\r
+    if (U_FAILURE(sts)) {\r
+        return;\r
+    }\r
+\r
+    UResourceBundle *keyTypeDataRes = NULL;\r
+    UResourceBundle *keyMapRes = NULL;\r
+    UResourceBundle *typeMapRes = NULL;\r
+    UResourceBundle *typeAliasRes = NULL;\r
+    UResourceBundle *bcpTypeAliasRes = NULL;\r
+\r
+    keyTypeDataRes = ures_openDirect(NULL, "keyTypeData", &sts);\r
+    keyMapRes = ures_getByKey(keyTypeDataRes, "keyMap", NULL, &sts);\r
+    typeMapRes = ures_getByKey(keyTypeDataRes, "typeMap", NULL, &sts);\r
+\r
+    UErrorCode tmpSts = U_ZERO_ERROR;\r
+    typeAliasRes = ures_getByKey(keyTypeDataRes, "typeAlias", NULL, &tmpSts);\r
+    if (U_FAILURE(tmpSts)) {\r
+        typeAliasRes = NULL;\r
+        tmpSts = U_ZERO_ERROR;\r
+    }\r
+    bcpTypeAliasRes = ures_getByKey(keyTypeDataRes, "bcpTypeAlias", NULL, &tmpSts);\r
+    if (U_FAILURE(tmpSts)) {\r
+        bcpTypeAliasRes = NULL;\r
+        tmpSts = U_ZERO_ERROR;\r
+    }\r
+\r
+    // initialize vectors storing dynamically allocated objects\r
+    gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);\r
+    if (gKeyTypeStringPool == NULL || U_FAILURE(sts)) {\r
+        goto close_bundles;\r
+    }\r
+    gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);\r
+    if (gLocExtKeyDataEntries == NULL || U_FAILURE(sts)) {\r
+        goto close_bundles;\r
+    }\r
+    gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);\r
+    if (gLocExtTypeEntries == NULL || U_FAILURE(sts)) {\r
+        goto close_bundles;\r
+    }\r
+\r
+    // iterate through keyMap resource\r
+    UResourceBundle keyMapEntry;\r
+    ures_initStackObject(&keyMapEntry);\r
+\r
+    while (ures_hasNext(keyMapRes)) {\r
+        ures_getNextResource(keyMapRes, &keyMapEntry, &sts);\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+        const char* legacyKeyId = ures_getKey(&keyMapEntry);\r
+        int32_t bcpKeyIdLen = 0;\r
+        const UChar* uBcpKeyId = ures_getString(&keyMapEntry, &bcpKeyIdLen, &sts);\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+\r
+        // empty value indicates that BCP key is same with the legacy key.\r
+        const char* bcpKeyId = legacyKeyId;\r
+        if (bcpKeyIdLen > 0) {\r
+            char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);\r
+            if (bcpKeyIdBuf == NULL) {\r
+                sts = U_MEMORY_ALLOCATION_ERROR;\r
+                break;\r
+            }\r
+            u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);\r
+            bcpKeyIdBuf[bcpKeyIdLen] = 0;\r
+            gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);\r
+            if (U_FAILURE(sts)) {\r
+                break;\r
+            }\r
+            bcpKeyId = bcpKeyIdBuf;\r
+        }\r
+\r
+        UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;\r
+\r
+        UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+        uint32_t specialTypes = SPECIALTYPE_NONE;\r
+\r
+        UResourceBundle* typeAliasResByKey = NULL;\r
+        UResourceBundle* bcpTypeAliasResByKey = NULL;\r
+\r
+        if (typeAliasRes != NULL) {\r
+            typeAliasResByKey = ures_getByKey(typeAliasRes, legacyKeyId, NULL, &tmpSts);\r
+            if (U_FAILURE(tmpSts)) {\r
+                // only a few keys have type alias mapping\r
+                typeAliasResByKey = NULL;\r
+                tmpSts = U_ZERO_ERROR;\r
+            }\r
+        }\r
+        if (bcpTypeAliasRes != NULL) {\r
+            bcpTypeAliasResByKey = ures_getByKey(bcpTypeAliasRes, bcpKeyId, NULL, &tmpSts);\r
+            if (U_FAILURE(tmpSts)) {\r
+                // only a few keys have BCP type alias mapping\r
+                bcpTypeAliasResByKey = NULL;\r
+                tmpSts = U_ZERO_ERROR;\r
+            }\r
+        }\r
+\r
+        // look up type map for the key, and walk through the mapping data\r
+        UResourceBundle* typeMapResByKey = ures_getByKey(typeMapRes, legacyKeyId, NULL, &tmpSts);\r
+        if (U_FAILURE(tmpSts)) {\r
+            // type map for each key must exist\r
+            U_ASSERT(FALSE);\r
+            tmpSts = U_ZERO_ERROR;\r
+        } else {\r
+            UResourceBundle typeMapEntry;\r
+            ures_initStackObject(&typeMapEntry);\r
+\r
+            while (ures_hasNext(typeMapResByKey)) {\r
+                ures_getNextResource(typeMapResByKey, &typeMapEntry, &sts);\r
+                if (U_FAILURE(sts)) {\r
+                    break;\r
+                }\r
+                const char* legacyTypeId = ures_getKey(&typeMapEntry);\r
+\r
+                // special types\r
+                if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {\r
+                    specialTypes |= SPECIALTYPE_CODEPOINTS;\r
+                    continue;\r
+                }\r
+                if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {\r
+                    specialTypes |= SPECIALTYPE_REORDER_CODE;\r
+                    continue;\r
+                }\r
+\r
+                if (isTZ) {\r
+                    // a timezone key uses a colon instead of a slash in the resource.\r
+                    // e.g. America:Los_Angeles\r
+                    if (uprv_strchr(legacyTypeId, ':') != NULL) {\r
+                        int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);\r
+                        char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);\r
+                        if (legacyTypeIdBuf == NULL) {\r
+                            sts = U_MEMORY_ALLOCATION_ERROR;\r
+                            break;\r
+                        }\r
+                        const char* p = legacyTypeId;\r
+                        char* q = legacyTypeIdBuf;\r
+                        while (*p) {\r
+                            if (*p == ':') {\r
+                                *q++ = '/';\r
+                            } else {\r
+                                *q++ = *p;\r
+                            }\r
+                            p++;\r
+                        }\r
+                        *q = 0;\r
+\r
+                        gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);\r
+                        if (U_FAILURE(sts)) {\r
+                            break;\r
+                        }\r
+                        legacyTypeId = legacyTypeIdBuf;\r
+                    }\r
+                }\r
+\r
+                int32_t bcpTypeIdLen = 0;\r
+                const UChar* uBcpTypeId = ures_getString(&typeMapEntry, &bcpTypeIdLen, &sts);\r
+                if (U_FAILURE(sts)) {\r
+                    break;\r
+                }\r
+\r
+                // empty value indicates that BCP type is same with the legacy type.\r
+                const char* bcpTypeId = legacyTypeId;\r
+                if (bcpTypeIdLen > 0) {\r
+                    char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);\r
+                    if (bcpTypeIdBuf == NULL) {\r
+                        sts = U_MEMORY_ALLOCATION_ERROR;\r
+                        break;\r
+                    }\r
+                    u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);\r
+                    bcpTypeIdBuf[bcpTypeIdLen] = 0;\r
+                    gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);\r
+                    if (U_FAILURE(sts)) {\r
+                        break;\r
+                    }\r
+                    bcpTypeId = bcpTypeIdBuf;\r
+                }\r
+\r
+                // Note: legacy type value should never be\r
+                // equivalent to bcp type value of a different\r
+                // type under the same key. So we use a single\r
+                // map for lookup.\r
+                LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));\r
+                if (t == NULL) {\r
+                    sts = U_MEMORY_ALLOCATION_ERROR;\r
+                    break;\r
+                }\r
+                t->bcpId = bcpTypeId;\r
+                t->legacyId = legacyTypeId;\r
+                gLocExtTypeEntries->addElement((void*)t, sts);\r
+                if (U_FAILURE(sts)) {\r
+                    break;\r
+                }\r
+\r
+                uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);\r
+                if (bcpTypeId != legacyTypeId) {\r
+                    // different type value\r
+                    uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);\r
+                }\r
+                if (U_FAILURE(sts)) {\r
+                    break;\r
+                }\r
+\r
+                // also put aliases in the map\r
+                if (typeAliasResByKey != NULL) {\r
+                    UResourceBundle typeAliasDataEntry;\r
+                    ures_initStackObject(&typeAliasDataEntry);\r
+\r
+                    ures_resetIterator(typeAliasResByKey);\r
+                    while (ures_hasNext(typeAliasResByKey) && U_SUCCESS(sts)) {\r
+                        int32_t toLen;\r
+                        ures_getNextResource(typeAliasResByKey, &typeAliasDataEntry, &sts);\r
+                        const UChar* to = ures_getString(&typeAliasDataEntry, &toLen, &sts);\r
+                        if (U_FAILURE(sts)) {\r
+                            break;\r
+                        }\r
+                        // check if this is an alias of canoncal legacy type\r
+                        if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {\r
+                            const char* from = ures_getKey(&typeAliasDataEntry);\r
+                            if (isTZ) {\r
+                                // replace colon with slash if necessary\r
+                                if (uprv_strchr(from, ':') != NULL) {\r
+                                    int32_t fromLen = uprv_strlen(from);\r
+                                    char* fromBuf = (char*)uprv_malloc(fromLen + 1);\r
+                                    if (fromBuf == NULL) {\r
+                                        sts = U_MEMORY_ALLOCATION_ERROR;\r
+                                        break;\r
+                                    }\r
+                                    const char* p = from;\r
+                                    char* q = fromBuf;\r
+                                    while (*p) {\r
+                                        if (*p == ':') {\r
+                                            *q++ = '/';\r
+                                        } else {\r
+                                            *q++ = *p;\r
+                                        }\r
+                                        p++;\r
+                                    }\r
+                                    *q = 0;\r
+\r
+                                    gKeyTypeStringPool->addElement(fromBuf, sts);\r
+                                    if (U_FAILURE(sts)) {\r
+                                        break;\r
+                                    }\r
+                                    from = fromBuf;\r
+                                }\r
+                            }\r
+                            uhash_put(typeDataMap, (void*)from, t, &sts);\r
+                        }\r
+                    }\r
+                    ures_close(&typeAliasDataEntry);\r
+                    if (U_FAILURE(sts)) {\r
+                        break;\r
+                    }\r
+                }\r
+\r
+                if (bcpTypeAliasResByKey != NULL) {\r
+                    UResourceBundle bcpTypeAliasDataEntry;\r
+                    ures_initStackObject(&bcpTypeAliasDataEntry);\r
+\r
+                    ures_resetIterator(bcpTypeAliasResByKey);\r
+                    while (ures_hasNext(bcpTypeAliasResByKey) && U_SUCCESS(sts)) {\r
+                        int32_t toLen;\r
+                        ures_getNextResource(bcpTypeAliasResByKey, &bcpTypeAliasDataEntry, &sts);\r
+                        const UChar* to = ures_getString(&bcpTypeAliasDataEntry, &toLen, &sts);\r
+                        if (U_FAILURE(sts)) {\r
+                            break;\r
+                        }\r
+                        // check if this is an alias of bcp type\r
+                        if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {\r
+                            const char* from = ures_getKey(&bcpTypeAliasDataEntry);\r
+                            uhash_put(typeDataMap, (void*)from, t, &sts);\r
+                        }\r
+                    }\r
+                    ures_close(&bcpTypeAliasDataEntry);\r
+                    if (U_FAILURE(sts)) {\r
+                        break;\r
+                    }\r
+                }\r
+            }\r
+            ures_close(&typeMapEntry);\r
+        }\r
+        ures_close(typeMapResByKey);\r
+        ures_close(typeAliasResByKey);\r
+        ures_close(bcpTypeAliasResByKey);\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+\r
+        LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));\r
+        if (keyData == NULL) {\r
+            sts = U_MEMORY_ALLOCATION_ERROR;\r
+            break;\r
+        }\r
+        keyData->bcpId = bcpKeyId;\r
+        keyData->legacyId = legacyKeyId;\r
+        keyData->specialTypes = specialTypes;\r
+        keyData->typeMap = typeDataMap;\r
+\r
+        gLocExtKeyDataEntries->addElement((void*)keyData, sts);\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+\r
+        uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);\r
+        if (legacyKeyId != bcpKeyId) {\r
+            // different key value\r
+            uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);\r
+        }\r
+        if (U_FAILURE(sts)) {\r
+            break;\r
+        }\r
+    }\r
+\r
+    ures_close(&keyMapEntry);\r
+\r
+close_bundles:\r
+    ures_close(bcpTypeAliasRes);\r
+    ures_close(typeAliasRes);\r
+    ures_close(typeMapRes);\r
+    ures_close(keyMapRes);\r
+    ures_close(keyTypeDataRes);\r
+}\r
+\r
+static UBool\r
+init() {\r
+    UErrorCode sts = U_ZERO_ERROR;\r
+    umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);\r
+    if (U_FAILURE(sts)) {\r
+        return FALSE;\r
+    }\r
+    return TRUE;\r
+}\r
+\r
+static UBool\r
+isSpecialTypeCodepoints(const char* val) {\r
+    int32_t subtagLen = 0;\r
+    const char* p = val;\r
+    while (*p) {\r
+        if (*p == '-') {\r
+            if (subtagLen < 4 || subtagLen > 6) {\r
+                return FALSE;\r
+            }\r
+            subtagLen = 0;\r
+        } else if (('0' <= *p && *p <= '9') ||\r
+                    ('A' <= *p && *p <= 'F') || ('a' <= *p && *p <= 'f')) {\r
+            subtagLen++;\r
+        } else {\r
+            return FALSE;\r
+        }\r
+        p++;\r
+    }\r
+    return (subtagLen >= 4 && subtagLen <= 6);\r
+}\r
+\r
+static UBool\r
+isSpecialTypeReorderCode(const char* val) {\r
+    int32_t subtagLen = 0;\r
+    const char* p = val;\r
+    while (*p) {\r
+        if (*p == '-') {\r
+            if (subtagLen < 3 || subtagLen > 8) {\r
+                return FALSE;\r
+            }\r
+            subtagLen = 0;\r
+        } else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z')) {\r
+            subtagLen++;\r
+        } else {\r
+            return FALSE;\r
+        }\r
+        p++;\r
+    }\r
+    return (subtagLen >=3 && subtagLen <=8);\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toBcpKey(const char* key) {\r
+    if (!init()) {\r
+        return NULL;\r
+    }\r
+\r
+    LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+    if (keyData != NULL) {\r
+        return keyData->bcpId;\r
+    }\r
+    return NULL;\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toLegacyKey(const char* key) {\r
+    if (!init()) {\r
+        return NULL;\r
+    }\r
+\r
+    LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+    if (keyData != NULL) {\r
+        return keyData->legacyId;\r
+    }\r
+    return NULL;\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {\r
+    if (isKnownKey != NULL) {\r
+        *isKnownKey = FALSE;\r
+    }\r
+    if (isSpecialType != NULL) {\r
+        *isSpecialType = FALSE;\r
+    }\r
+\r
+    if (!init()) {\r
+        return NULL;\r
+    }\r
+\r
+    LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+    if (keyData != NULL) {\r
+        if (isKnownKey != NULL) {\r
+            *isKnownKey = TRUE;\r
+        }\r
+        LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);\r
+        if (t != NULL) {\r
+            return t->bcpId;\r
+        }\r
+        if (keyData->specialTypes != SPECIALTYPE_NONE) {\r
+            UBool matched = FALSE;\r
+            if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {\r
+                matched = isSpecialTypeCodepoints(type);\r
+            }\r
+            if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {\r
+                matched = isSpecialTypeReorderCode(type);\r
+            }\r
+            if (matched) {\r
+                if (isSpecialType != NULL) {\r
+                    *isSpecialType = TRUE;\r
+                }\r
+                return type;\r
+            }\r
+        }\r
+    }\r
+    return NULL;\r
+}\r
+\r
+\r
+U_CFUNC const char*\r
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {\r
+    if (isKnownKey != NULL) {\r
+        *isKnownKey = FALSE;\r
+    }\r
+    if (isSpecialType != NULL) {\r
+        *isSpecialType = FALSE;\r
+    }\r
+\r
+    if (!init()) {\r
+        return NULL;\r
+    }\r
+\r
+    LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+    if (keyData != NULL) {\r
+        if (isKnownKey != NULL) {\r
+            *isKnownKey = TRUE;\r
+        }\r
+        LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);\r
+        if (t != NULL) {\r
+            return t->legacyId;\r
+        }\r
+        if (keyData->specialTypes != SPECIALTYPE_NONE) {\r
+            UBool matched = FALSE;\r
+            if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {\r
+                matched = isSpecialTypeCodepoints(type);\r
+            }\r
+            if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {\r
+                matched = isSpecialTypeReorderCode(type);\r
+            }\r
+            if (matched) {\r
+                if (isSpecialType != NULL) {\r
+                    *isSpecialType = TRUE;\r
+                }\r
+                return type;\r
+            }\r
+        }\r
+    }\r
+    return NULL;\r
+}\r
+\r
index 3725955b3612b9d07677156a2009318bd53507f8..c038026790d6a9bc55dc2501034f99568b9c9467 100644 (file)
@@ -408,8 +408,8 @@ _isPrivateuseValueSubtags(const char* s, int32_t len) {
     return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
 }
 
-static UBool
-_isLDMLKey(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
     if (len < 0) {
         len = (int32_t)uprv_strlen(s);
     }
@@ -419,17 +419,33 @@ _isLDMLKey(const char* s, int32_t len) {
     return FALSE;
 }
 
-static UBool
-_isLDMLType(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+    const char* p;
+    int32_t subtagLen = 0;
+
     if (len < 0) {
         len = (int32_t)uprv_strlen(s);
     }
-    if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
-        return TRUE;
+
+    for (p = s; len > 0; p++, len--) {
+        if (*p == SEP) {
+            if (subtagLen < 3) {
+                return FALSE;
+            }
+            subtagLen = 0;
+        } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
+            subtagLen++;
+            if (subtagLen > 8) {
+                return FALSE;
+            }
+        } else {
+            return FALSE;
+        }
     }
-    return FALSE;
-}
 
+    return (subtagLen >= 3);
+}
 /*
 * -------------------------------------------------
 *
@@ -608,417 +624,6 @@ _initializeULanguageTag(ULanguageTag* langtag) {
     langtag->privateuse = EMPTY;
 }
 
-#define KEYTYPEDATA     "keyTypeData"
-#define KEYMAP          "keyMap"
-#define TYPEMAP         "typeMap"
-#define TYPEALIAS       "typeAlias"
-#define MAX_BCP47_SUBTAG_LEN    9   /* including null terminator */
-#define MAX_LDML_KEY_LEN        22
-#define MAX_LDML_TYPE_LEN       32
-
-static int32_t
-_ldmlKeyToBCP47(const char* key, int32_t keyLen,
-                char* bcpKey, int32_t bcpKeyCapacity,
-                UErrorCode *status) {
-    UResourceBundle *rb;
-    char keyBuf[MAX_LDML_KEY_LEN];
-    char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
-    int32_t resultLen = 0;
-    int32_t i;
-    UErrorCode tmpStatus = U_ZERO_ERROR;
-    const UChar *uBcpKey;
-    int32_t bcpKeyLen;
-
-    if (keyLen < 0) {
-        keyLen = (int32_t)uprv_strlen(key);
-    }
-
-    if (keyLen >= sizeof(keyBuf)) {
-        /* no known valid LDML key exceeding 21 */
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    uprv_memcpy(keyBuf, key, keyLen);
-    keyBuf[keyLen] = 0;
-
-    /* to lower case */
-    for (i = 0; i < keyLen; i++) {
-        keyBuf[i] = uprv_tolower(keyBuf[i]);
-    }
-
-    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
-    ures_getByKey(rb, KEYMAP, rb, status);
-
-    if (U_FAILURE(*status)) {
-        ures_close(rb);
-        return 0;
-    }
-
-    uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
-    if (U_SUCCESS(tmpStatus)) {
-        if (bcpKeyLen == 0) {
-            /* empty value indicates the BCP47 key is same with the legacy key */
-            uprv_memcpy(bcpKeyBuf, key, keyLen);
-            bcpKeyBuf[keyLen] = 0;
-            resultLen = keyLen;
-        } else {
-            u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
-            bcpKeyBuf[bcpKeyLen] = 0;
-            resultLen = bcpKeyLen;
-        }
-    } else {
-        if (_isLDMLKey(key, keyLen)) {
-            uprv_memcpy(bcpKeyBuf, key, keyLen);
-            bcpKeyBuf[keyLen] = 0;
-            resultLen = keyLen;
-        } else {
-            /* mapping not availabe */
-            *status = U_ILLEGAL_ARGUMENT_ERROR;
-        }
-    }
-    ures_close(rb);
-
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
-    return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
-                char* key, int32_t keyCapacity,
-                UErrorCode *status) {
-    UResourceBundle *rb;
-    char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
-    int32_t resultLen = 0;
-    int32_t i;
-    const char *resKey = NULL;
-    UResourceBundle *mapData;
-
-    if (bcpKeyLen < 0) {
-        bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
-    }
-
-    if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
-    bcpKeyBuf[bcpKeyLen] = 0;
-
-    /* to lower case */
-    for (i = 0; i < bcpKeyLen; i++) {
-        bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
-    }
-
-    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
-    ures_getByKey(rb, KEYMAP, rb, status);
-    if (U_FAILURE(*status)) {
-        ures_close(rb);
-        return 0;
-    }
-
-    mapData = ures_getNextResource(rb, NULL, status);
-    while (U_SUCCESS(*status)) {
-        const UChar *uBcpKey;
-        char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
-        int32_t tmpBcpKeyLen;
-        const char *tmpBcpKey = tmpBcpKeyBuf;
-
-        uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
-        if (U_FAILURE(*status)) {
-            break;
-        }
-        if (tmpBcpKeyLen == 0) {
-            /* empty value indicates the BCP47 key is same with the legacy key */
-            tmpBcpKey = ures_getKey(mapData);
-        } else {
-            u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
-            tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
-        }
-        if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKey) == 0) {
-            /* found a matching BCP47 key */
-            resKey = ures_getKey(mapData);
-            resultLen = (int32_t)uprv_strlen(resKey);
-            break;
-        }
-        if (!ures_hasNext(rb)) {
-            break;
-        }
-        ures_getNextResource(rb, mapData, status);
-    }
-    ures_close(mapData);
-    ures_close(rb);
-
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    if (resKey == NULL) {
-        resKey = bcpKeyBuf;
-        resultLen = bcpKeyLen;
-    }
-
-    uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
-    return u_terminateChars(key, keyCapacity, resultLen, status);
-}
-
-static int32_t
-_ldmlTypeToBCP47(const char* key, int32_t keyLen,
-                 const char* type, int32_t typeLen,
-                 char* bcpType, int32_t bcpTypeCapacity,
-                 UErrorCode *status) {
-    UResourceBundle *rb, *keyTypeData, *typeMapForKey;
-    char keyBuf[MAX_LDML_KEY_LEN];
-    char typeBuf[MAX_LDML_TYPE_LEN];
-    char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
-    int32_t resultLen = 0;
-    int32_t i;
-    UErrorCode tmpStatus = U_ZERO_ERROR;
-    const UChar *uBcpType, *uCanonicalType;
-    int32_t bcpTypeLen, canonicalTypeLen;
-    UBool isTimezone = FALSE;
-
-    if (keyLen < 0) {
-        keyLen = (int32_t)uprv_strlen(key);
-    }
-    if (keyLen >= sizeof(keyBuf)) {
-        /* no known valid LDML key exceeding 21 */
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-    uprv_memcpy(keyBuf, key, keyLen);
-    keyBuf[keyLen] = 0;
-
-    /* to lower case */
-    for (i = 0; i < keyLen; i++) {
-        keyBuf[i] = uprv_tolower(keyBuf[i]);
-    }
-    if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
-        isTimezone = TRUE;
-    }
-
-    if (typeLen < 0) {
-        typeLen = (int32_t)uprv_strlen(type);
-    }
-    if (typeLen >= sizeof(typeBuf)) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    if (isTimezone) {
-        /* replace '/' with ':' */
-        for (i = 0; i < typeLen; i++) {
-            if (*(type + i) == '/') {
-                typeBuf[i] = ':';
-            } else {
-                typeBuf[i] = *(type + i);
-            }
-        }
-        typeBuf[typeLen] = 0;
-        type = &typeBuf[0];
-    }
-
-    keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
-    rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
-    if (U_FAILURE(*status)) {
-        ures_close(rb);
-        ures_close(keyTypeData);
-        return 0;
-    }
-
-    typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
-    uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
-    if (U_SUCCESS(tmpStatus)) {
-        if (bcpTypeLen == 0) {
-            /* empty value indicates the BCP47 type is same with the legacy type */
-            uprv_memcpy(bcpTypeBuf, type, typeLen);
-            resultLen = typeLen;
-        } else {
-            u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
-            resultLen = bcpTypeLen;
-        }
-    } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
-        /* is this type alias? */
-        tmpStatus = U_ZERO_ERROR;
-        ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
-        ures_getByKey(rb, keyBuf, rb, &tmpStatus);
-        uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
-        if (U_SUCCESS(tmpStatus)) {
-            u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
-            if (isTimezone) {
-                /* replace '/' with ':' */
-                for (i = 0; i < canonicalTypeLen; i++) {
-                    if (typeBuf[i] == '/') {
-                        typeBuf[i] = ':';
-                    }
-                }
-            }
-            typeBuf[canonicalTypeLen] = 0;
-
-            /* look up the canonical type */
-            uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
-            if (U_SUCCESS(tmpStatus)) {
-                if (bcpTypeLen == 0) {
-                    /* empty value indicates the BCP47 type is same with the legacy type */
-                    uprv_memcpy(bcpTypeBuf, typeBuf, canonicalTypeLen);
-                    resultLen = canonicalTypeLen;
-                } else {
-                    u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
-                    resultLen = bcpTypeLen;
-                }
-            }
-        }
-        if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
-            if (_isLDMLType(type, typeLen)) {
-                uprv_memcpy(bcpTypeBuf, type, typeLen);
-                resultLen = typeLen;
-            } else {
-                /* mapping not availabe */
-                *status = U_ILLEGAL_ARGUMENT_ERROR;
-            }
-        }
-    } else {
-        *status = tmpStatus;
-    }
-    ures_close(rb);
-    ures_close(typeMapForKey);
-    ures_close(keyTypeData);
-
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
-    return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLType(const char* key, int32_t keyLen,
-                 const char* bcpType, int32_t bcpTypeLen,
-                 char* type, int32_t typeCapacity,
-                 UErrorCode *status) {
-    UResourceBundle *rb;
-    char keyBuf[MAX_LDML_KEY_LEN];
-    char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
-    int32_t resultLen = 0;
-    int32_t i, typeSize;
-    const char *resType = NULL;
-    UResourceBundle *mapData;
-    UErrorCode tmpStatus = U_ZERO_ERROR;
-    int32_t copyLen;
-
-    if (keyLen < 0) {
-        keyLen = (int32_t)uprv_strlen(key);
-    }
-
-    if (keyLen >= sizeof(keyBuf)) {
-        /* no known valid LDML key exceeding 21 */
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-    uprv_memcpy(keyBuf, key, keyLen);
-    keyBuf[keyLen] = 0;
-
-    /* to lower case */
-    for (i = 0; i < keyLen; i++) {
-        keyBuf[i] = uprv_tolower(keyBuf[i]);
-    }
-
-
-    if (bcpTypeLen < 0) {
-        bcpTypeLen = (int32_t)uprv_strlen(bcpType);
-    }
-
-    typeSize = 0;
-    for (i = 0; i < bcpTypeLen; i++) {
-        if (bcpType[i] == SEP) {
-            if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
-                *status = U_ILLEGAL_ARGUMENT_ERROR;
-                return 0;
-            }
-            typeSize = 0;
-        } else {
-            typeSize++;
-        }
-    }
-
-    uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
-    bcpTypeBuf[bcpTypeLen] = 0;
-
-    /* to lower case */
-    for (i = 0; i < bcpTypeLen; i++) {
-        bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
-    }
-
-    rb = ures_openDirect(NULL, KEYTYPEDATA, status);
-    ures_getByKey(rb, TYPEMAP, rb, status);
-    if (U_FAILURE(*status)) {
-        ures_close(rb);
-        return 0;
-    }
-
-    ures_getByKey(rb, keyBuf, rb, &tmpStatus);
-    mapData = ures_getNextResource(rb, NULL, &tmpStatus);
-    while (U_SUCCESS(tmpStatus)) {
-        const UChar *uBcpType;
-        char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
-        int32_t tmpBcpTypeLen;
-        const char *tmpBcpType = tmpBcpTypeBuf;
-
-        uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
-        if (U_FAILURE(tmpStatus)) {
-            break;
-        }
-        if (tmpBcpTypeLen == 0) {
-            /* empty value indicates the BCP47 type is same with the legacy type */
-            tmpBcpType = ures_getKey(mapData);
-        } else {
-            u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
-            tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
-        }
-        if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpType) == 0) {
-            /* found a matching BCP47 type */
-            resType = ures_getKey(mapData);
-            resultLen = (int32_t)uprv_strlen(resType);
-            break;
-        }
-        if (!ures_hasNext(rb)) {
-            break;
-        }
-        ures_getNextResource(rb, mapData, &tmpStatus);
-    }
-    ures_close(mapData);
-    ures_close(rb);
-
-    if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
-        *status = tmpStatus;
-        return 0;
-    }
-
-    if (resType == NULL) {
-        resType = bcpTypeBuf;
-        resultLen = bcpTypeLen;
-    }
-
-    copyLen = uprv_min(resultLen, typeCapacity);
-    uprv_memcpy(type, resType, copyLen);
-
-    if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
-        for (i = 0; i < copyLen; i++) {
-            if (*(type + i) == ':') {
-                *(type + i) = '/';
-            }
-        }
-    }
-
-    return u_terminateChars(type, typeCapacity, resultLen, status);
-}
-
 static int32_t
 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
     char buf[ULOC_LANG_CAPACITY];
@@ -1311,7 +916,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
         const char *bcpKey, *bcpValue;
         UErrorCode tmpStatus = U_ZERO_ERROR;
         int32_t keylen;
-        UBool isLDMLKeyword;
+        UBool isBcpUExt;
 
         while (TRUE) {
             isAttribute = FALSE;
@@ -1320,7 +925,8 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                 break;
             }
             len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
-            if (U_FAILURE(tmpStatus)) {
+            /* buf must be null-terminated */
+            if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
                 if (strict) {
                     *status = U_ILLEGAL_ARGUMENT_ERROR;
                     break;
@@ -1331,7 +937,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
             }
 
             keylen = (int32_t)uprv_strlen(key);
-            isLDMLKeyword = (keylen > 1);
+            isBcpUExt = (keylen > 1);
 
             /* special keyword used for representing Unicode locale attributes */
             if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
@@ -1379,36 +985,49 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                         }
                     }
                 }
-            } else if (isLDMLKeyword) {
-                int32_t modKeyLen;
-
-                /* transform key and value to bcp47 style */
-                modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
-                if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+            } else if (isBcpUExt) {
+                bcpKey = uloc_toUnicodeLocaleKey(key);
+                if (bcpKey == NULL) {
                     if (strict) {
                         *status = U_ILLEGAL_ARGUMENT_ERROR;
                         break;
                     }
-                    tmpStatus = U_ZERO_ERROR;
                     continue;
                 }
 
-                bcpKey = pExtBuf;
-                pExtBuf += (modKeyLen + 1);
-                extBufCapacity -= (modKeyLen + 1);
-
-                len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
-                if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+                /* we've checked buf is null-terminated above */
+                bcpValue = uloc_toUnicodeLocaleType(key, buf);
+                if (bcpValue == NULL) {
                     if (strict) {
                         *status = U_ILLEGAL_ARGUMENT_ERROR;
                         break;
                     }
-                    tmpStatus = U_ZERO_ERROR;
                     continue;
                 }
-                bcpValue = pExtBuf;
-                pExtBuf += (len + 1);
-                extBufCapacity -= (len + 1);
+                if (bcpValue == buf) {
+                    /* 
+                    When uloc_toUnicodeLocaleType(key, buf) returns the
+                    input value as is, the value is well-formed, but has
+                    no known mapping. This implementation normalizes the
+                    the value to lower case
+                    */
+                    int32_t bcpValueLen = uprv_strlen(bcpValue);
+                    if (bcpValueLen < extBufCapacity) {
+                        uprv_strcpy(pExtBuf, bcpValue);
+                        T_CString_toLowerCase(pExtBuf);
+
+                        bcpValue = pExtBuf;
+
+                        pExtBuf += (bcpValueLen + 1);
+                        extBufCapacity -= (bcpValueLen + 1);
+                    } else {
+                        if (strict) {
+                            *status = U_ILLEGAL_ARGUMENT_ERROR;
+                            break;
+                        }
+                        continue;
+                    }
+                }
             } else {
                 if (*key == PRIVATEUSE) {
                     if (!_isPrivateuseValueSubtags(buf, len)) {
@@ -1600,7 +1219,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
         /* locate next separator char */
         for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
 
-        if (_isLDMLKey(pTag, len)) {
+        if (ultag_isUnicodeLocaleKey(pTag, len)) {
             pKwds = pTag;
             break;
         }
@@ -1708,7 +1327,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
                 /* locate next separator char */
                 for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
 
-                if (_isLDMLKey(pTag, len)) {
+                if (ultag_isUnicodeLocaleKey(pTag, len)) {
                     if (pBcpKey) {
                         emitKeyword = TRUE;
                         pNextBcpKey = pTag;
@@ -1744,28 +1363,78 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
                 const char *pKey = NULL;    /* LDML key */
                 const char *pType = NULL;   /* LDML type */
 
+                char bcpKeyBuf[9];          /* BCP key length is always 2 for now */
+
                 U_ASSERT(pBcpKey != NULL);
 
+                if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
+                    /* the BCP key is invalid */
+                    *status = U_ILLEGAL_ARGUMENT_ERROR;
+                    goto cleanup;
+                }
+
+                uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
+                bcpKeyBuf[bcpKeyLen] = 0;
+
                 /* u extension key to LDML key */
-                len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
-                if (U_FAILURE(*status)) {
+                pKey = uloc_toLegacyKey(bcpKeyBuf);
+                if (pKey == NULL) {
+                    *status = U_ILLEGAL_ARGUMENT_ERROR;
                     goto cleanup;
                 }
-                pKey = buf + bufIdx;
-                bufIdx += len;
-                *(buf + bufIdx) = 0;
-                bufIdx++;
+                if (pKey == bcpKeyBuf) {
+                    /*
+                    The key returned by toLegacyKey points to the input buffer.
+                    We normalize the result key to lower case.
+                    */
+                    T_CString_toLowerCase(bcpKeyBuf);
+                    if (bufSize - bufIdx - 1 >= bcpKeyLen) {
+                        uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
+                        pKey = buf + bufIdx;
+                        bufIdx += bcpKeyLen;
+                        *(buf + bufIdx) = 0;
+                        bufIdx++;
+                    } else {
+                        *status = U_BUFFER_OVERFLOW_ERROR;
+                        goto cleanup;
+                    }
+                }
 
                 if (pBcpType) {
+                    char bcpTypeBuf[128];       /* practically long enough even considering multiple subtag type */
+                    if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
+                        /* the BCP type is too long */
+                        *status = U_ILLEGAL_ARGUMENT_ERROR;
+                        goto cleanup;
+                    }
+
+                    uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
+                    bcpTypeBuf[bcpTypeLen] = 0;
+
                     /* BCP type to locale type */
-                    len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
-                    if (U_FAILURE(*status)) {
+                    pType = uloc_toLegacyType(pKey, bcpTypeBuf);
+                    if (pType == NULL) {
+                        *status = U_ILLEGAL_ARGUMENT_ERROR;
                         goto cleanup;
                     }
-                    pType = buf + bufIdx;
-                    bufIdx += len;
-                    *(buf + bufIdx) = 0;
-                    bufIdx++;
+                    if (pType == bcpTypeBuf) {
+                        /*
+                        The type returned by toLegacyType points to the input buffer.
+                        We normalize the result type to lower case.
+                        */
+                        /* normalize to lower case */
+                        T_CString_toLowerCase(bcpTypeBuf);
+                        if (bufSize - bufIdx - 1 >= bcpTypeLen) {
+                            uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
+                            pType = buf + bufIdx;
+                            bufIdx += bcpTypeLen;
+                            *(buf + bufIdx) = 0;
+                            bufIdx++;
+                        } else {
+                            *status = U_BUFFER_OVERFLOW_ERROR;
+                            goto cleanup;
+                        }
+                    }
                 } else {
                     /* typeless - default type value is "yes" */
                     pType = LOCALE_TYPE_YES;
index ebc525ef9de706e2a098d7d1d4ba0cf940b725a9..164a730c0057d9b0bbfbba53095768bb182ad63d 100644 (file)
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2004-2010, International Business Machines
+*   Copyright (C) 2004-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@@ -62,4 +62,23 @@ ulocimp_getCountry(const char *localeID,
 U_CAPI const char * U_EXPORT2
 locale_getKeywordsStart(const char *localeID);
 
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char* s, int32_t len);
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
 #endif
index 5d9e1e7dc8881a1424d4bd6029a73b7f6531354a..f16380f0378a7a7ca012e6750fd86bb6d009279b 100644 (file)
@@ -1149,4 +1149,106 @@ uloc_toLanguageTag(const char* localeID,
                    UBool strict,
                    UErrorCode* err);
 
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ * 
+ * @param keyword       the input locale keyword (either legacy key
+ *                      such as "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @return              the well-formed BCP 47 Unicode locale extension key,
+ *                      or NULL if the specified locale keyword cannot be
+ *                      mapped to a well-formed BCP 47 Unicode locale extension
+ *                      key. 
+ * @see uloc_toLegacyKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax,  then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ * 
+ * @param keyword       the locale keyword (either legacy key such as
+ *                      "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @param value         the locale keyword value (either legacy type
+ *                      such as "phonebook" or BCP 47 Unicode locale extension
+ *                      type such as "phonebk").
+ * @return              the well-formed BCP47 Unicode locale extension type,
+ *                      or NULL if the locale keyword value cannot be mapped to
+ *                      a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ * 
+ * @param keyword       the input locale keyword (either BCP 47 Unicode locale
+ *                      extension key or legacy key).
+ * @return              the well-formed legacy key, or NULL if the specified
+ *                      keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword       the locale keyword (either legacy keyword such as
+ *                      "collation" or BCP 47 Unicode locale extension
+ *                      key such as "co").
+ * @param value         the locale keyword value (either BCP 47 Unicode locale
+ *                      extension type such as "phonebk" or legacy keyword value
+ *                      such as "phonebook").
+ * @return              the well-formed legacy type, or NULL if the specified
+ *                      keyword value cannot be mapped to a well-formed legacy
+ *                      type.
+ * @see toUnicodeLocaleType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif  /* U_HIDE_DRAFT_API */
+
 #endif /*_ULOC*/
index 3c06c40e7d19b757248cd2b00148a2f512e97a54..f36918677165308079f55e304cc9db58ac135c82 100644 (file)
@@ -250,6 +250,10 @@ void addLocaleTest(TestNode** root)
     TESTCASE(TestEnglishExemplarCharacters);
     TESTCASE(TestDisplayNameBrackets);
     TESTCASE(TestIsRightToLeft);
+    TESTCASE(TestToUnicodeLocaleKey);
+    TESTCASE(TestToLegacyKey);
+    TESTCASE(TestToUnicodeLocaleType);
+    TESTCASE(TestToLegacyType);
 }
 
 
@@ -5673,7 +5677,6 @@ static void TestLikelySubtags()
 }
 
 const char* const locale_to_langtag[][3] = {
-    {"@x=elmer",    "x-elmer",      "x-elmer"},
     {"",            "und",          "und"},
     {"en",          "en",           "en"},
     {"en_US",       "en-US",        "en-US"},
@@ -5707,9 +5710,9 @@ const char* const locale_to_langtag[][3] = {
     {"en@timezone=America/New_York;calendar=japanese",    "en-u-ca-japanese-tz-usnyc",    "en-u-ca-japanese-tz-usnyc"},
     {"en@timezone=US/Eastern",  "en-u-tz-usnyc",    "en-u-tz-usnyc"},
     {"en@x=x-y-z;a=a-b-c",  "en-x-x-y-z",   NULL},
-    {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic",  NULL},
+    {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic",  NULL},
     {"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
-    {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-EUR-va-posix", "en-US-u-ca-japanese-cu-EUR-va-posix"},
+    {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
     {"@x=elmer",    "x-elmer",      "x-elmer"},
     {"en@x=elmer",  "en-x-elmer",   "en-x-elmer"},
     {"@x=elmer;a=exta", "und-a-exta-x-elmer",   "und-a-exta-x-elmer"},
@@ -5779,6 +5782,7 @@ static const struct {
     const char  *locID;
     int32_t     len;
 } langtag_to_locale[] = {
+    {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn",   "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz",  FULL_LENGTH},
     {"en",                  "en",                   FULL_LENGTH},
     {"en-us",               "en_US",                FULL_LENGTH},
     {"und-US",              "_US",                  FULL_LENGTH},
@@ -5859,6 +5863,187 @@ static void TestForLanguageTag(void) {
     }
 }
 
+static void TestToUnicodeLocaleKey(void)
+{
+    /* $IN specifies the result should be the input pointer itself */
+    static const char* DATA[][2] = {
+        {"calendar",    "ca"},
+        {"CALEndar",    "ca"},  /* difference casing */
+        {"ca",          "ca"},  /* bcp key itself */
+        {"kv",          "kv"},  /* no difference between legacy and bcp */
+        {"foo",         NULL},  /* unknown, bcp ill-formed */
+        {"ZZ",          "$IN"}, /* unknown, bcp well-formed -  */
+        {NULL,          NULL}
+    };
+
+    int32_t i;
+    for (i = 0; DATA[i][0] != NULL; i++) {
+        const char* keyword = DATA[i][0];
+        const char* expected = DATA[i][1];
+        const char* bcpKey = NULL;
+
+        bcpKey = uloc_toUnicodeLocaleKey(keyword);
+        if (expected == NULL) {
+            if (bcpKey != NULL) {
+                log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", keyword, bcpKey);
+            }
+        } else if (bcpKey == NULL) {
+            log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+        } else if (uprv_strcmp(expected, "$IN") == 0) {
+            if (bcpKey != keyword) {
+                log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, bcpKey, keyword);
+            }
+        } else if (uprv_strcmp(bcpKey, expected) != 0) {
+            log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s\n", keyword, bcpKey, expected);
+        }
+    }
+}
+
+static void TestToLegacyKey(void)
+{
+    /* $IN specifies the result should be the input pointer itself */
+    static const char* DATA[][2] = {
+        {"kb",          "colbackwards"},
+        {"kB",          "colbackwards"},    /* different casing */
+        {"Collation",   "collation"},   /* keyword itself with different casing */
+        {"kv",          "kv"},  /* no difference between legacy and bcp */
+        {"foo",         "$IN"}, /* unknown, bcp ill-formed */
+        {"ZZ",          "$IN"}, /* unknown, bcp well-formed */
+        {"e=mc2",       NULL},  /* unknown, bcp/legacy ill-formed */
+        {NULL,          NULL}
+    };
+
+    int32_t i;
+    for (i = 0; DATA[i][0] != NULL; i++) {
+        const char* keyword = DATA[i][0];
+        const char* expected = DATA[i][1];
+        const char* legacyKey = NULL;
+
+        legacyKey = uloc_toLegacyKey(keyword);
+        if (expected == NULL) {
+            if (legacyKey != NULL) {
+                log_err("toLegacyKey: keyword=%s => %s, expected=NULL\n", keyword, legacyKey);
+            }
+        } else if (legacyKey == NULL) {
+            log_err("toLegacyKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+        } else if (uprv_strcmp(expected, "$IN") == 0) {
+            if (legacyKey != keyword) {
+                log_err("toLegacyKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, legacyKey, keyword);
+            }
+        } else if (uprv_strcmp(legacyKey, expected) != 0) {
+            log_err("toUnicodeLocaleKey: keyword=%s, %s, expected=%s\n", keyword, legacyKey, expected);
+        }
+    }
+}
+
+static void TestToUnicodeLocaleType(void)
+{
+    /* $IN specifies the result should be the input pointer itself */
+    static const char* DATA[][3] = {
+        {"tz",              "Asia/Kolkata",     "inccu"},
+        {"calendar",        "gregorian",        "gregory"},
+        {"ca",              "gregorian",        "gregory"},
+        {"ca",              "Gregorian",        "gregory"},
+        {"ca",              "buddhist",         "buddhist"},
+        {"Calendar",        "Japanese",         "japanese"},
+        {"calendar",        "Islamic-Civil",    "islamic-civil"},
+        {"calendar",        "islamicc",         "islamic-civil"},   /* bcp type alias */
+        {"colalternate",    "NON-IGNORABLE",    "noignore"},
+        {"colcaselevel",    "yes",              "true"},
+        {"tz",              "america/new_york", "usnyc"},
+        {"tz",              "Asia/Kolkata",     "inccu"},
+        {"timezone",        "navajo",           "usden"},
+        {"ca",              "aaaa",             "$IN"},     /* unknown type, well-formed type */
+        {"ca",              "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+        {"zz",              "gregorian",        NULL},      /* unknown key, ill-formed type */
+        {"co",              "foo-",             NULL},      /* unknown type, ill-formed type */
+        {"variableTop",     "00A0",             "$IN"},     /* valid codepoints type */
+        {"variableTop",     "wxyz",             "$IN"},     /* invalid codepoints type - return as is for now */
+        {"kr",              "space-punct",      "space-punct"}, /* valid reordercode type */
+        {"kr",              "digit-spacepunct", NULL},      /* invalid (bcp ill-formed) reordercode type */
+        {NULL,              NULL,               NULL}
+    };
+
+    int32_t i;
+    for (i = 0; DATA[i][0] != NULL; i++) {
+        const char* keyword = DATA[i][0];
+        const char* value = DATA[i][1];
+        const char* expected = DATA[i][2];
+        const char* bcpType = NULL;
+
+        bcpType = uloc_toUnicodeLocaleType(keyword, value);
+        if (expected == NULL) {
+            if (bcpType != NULL) {
+                log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, bcpType);
+            }
+        } else if (bcpType == NULL) {
+            log_err("toUnicodeLocaleType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+        } else if (uprv_strcmp(expected, "$IN") == 0) {
+            if (bcpType != value) {
+                log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, bcpType, value);
+            }
+        } else if (uprv_strcmp(bcpType, expected) != 0) {
+            log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, bcpType, expected);
+        }
+    }
+}
+
+static void TestToLegacyType(void)
+{
+    /* $IN specifies the result should be the input pointer itself */
+    static const char* DATA[][3] = {
+        {"calendar",        "gregory",          "gregorian"},
+        {"ca",              "gregory",          "gregorian"},
+        {"ca",              "Gregory",          "gregorian"},
+        {"ca",              "buddhist",         "buddhist"},
+        {"Calendar",        "Japanese",         "japanese"},
+        {"calendar",        "Islamic-Civil",    "islamic-civil"},
+        {"calendar",        "islamicc",         "islamic-civil"},   /* bcp type alias */
+        {"colalternate",    "noignore",         "non-ignorable"},
+        {"colcaselevel",    "true",             "yes"},
+        {"tz",              "usnyc",            "America/New_York"},
+        {"tz",              "inccu",            "Asia/Calcutta"},
+        {"timezone",        "usden",            "America/Denver"},
+        {"timezone",        "usnavajo",         "America/Denver"},  /* bcp type alias */
+        {"colstrength",     "quarternary",      "quaternary"},  /* type alias */
+        {"ca",              "aaaa",             "$IN"}, /* unknown type */
+        {"calendar",        "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+        {"zz",              "gregorian",        "$IN"}, /* unknown key, bcp ill-formed type */
+        {"ca",              "gregorian-calendar",   "$IN"}, /* known key, bcp ill-formed type */
+        {"co",              "e=mc2",            NULL},  /* known key, ill-formed bcp/legacy type */
+        {"variableTop",     "00A0",             "$IN"},     /* valid codepoints type */
+        {"variableTop",     "wxyz",             "$IN"},    /* invalid codepoints type - return as is for now */
+        {"kr",              "space-punct",      "space-punct"}, /* valid reordercode type */
+        {"kr",              "digit-spacepunct", "digit-spacepunct"},    /* invalid reordercode type, bad ok for legacy syntax */
+        {NULL,              NULL,               NULL}
+    };
+
+    int32_t i;
+    for (i = 0; DATA[i][0] != NULL; i++) {
+        const char* keyword = DATA[i][0];
+        const char* value = DATA[i][1];
+        const char* expected = DATA[i][2];
+        const char* legacyType = NULL;
+
+        legacyType = uloc_toLegacyType(keyword, value);
+        if (expected == NULL) {
+            if (legacyType != NULL) {
+                log_err("toLegacyType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, legacyType);
+            }
+        } else if (legacyType == NULL) {
+            log_err("toLegacyType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+        } else if (uprv_strcmp(expected, "$IN") == 0) {
+            if (legacyType != value) {
+                log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, legacyType, value);
+            }
+        } else if (uprv_strcmp(legacyType, expected) != 0) {
+            log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, legacyType, expected);
+        }
+    }
+}
+
+
+
 static void test_unicode_define(const char *namech, char ch, const char *nameu, UChar uch)
 {
   UChar asUch[1];
index ac313b3eb9a74bdb48411af1e0144d855f4c440b..8ae243f9dba5b22199db40f9d797c616a1d96cc6 100644 (file)
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT:
- * Copyright (c) 1997-2013, International Business Machines Corporation and
+ * Copyright (c) 1997-2014, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/
 /********************************************************************************
@@ -123,6 +123,11 @@ static void TestLikelySubtags(void);
 static void TestForLanguageTag(void);
 static void TestToLanguageTag(void);
 
+static void TestToUnicodeLocaleKey(void);
+static void TestToLegacyKey(void);
+static void TestToUnicodeLocaleType(void);
+static void TestToLegacyType(void);
+
 /**
  * locale data
  */