icu4c/source/allinone/icucheck.bat -text
icu4c/source/common/common.vcxproj -text
icu4c/source/common/common.vcxproj.filters -text
+icu4c/source/common/uloc_keytype.cpp -text
icu4c/source/common/unifiedcache.cpp -text
icu4c/source/common/unifiedcache.h -text
icu4c/source/data/coll/dsb.txt -text
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o \
-sharedobject.o simplepatternformatter.o unifiedcache.o
+sharedobject.o simplepatternformatter.o unifiedcache.o uloc_keytype.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
-<?xml version="1.0" encoding="utf-8"?>\r
+<?xml version="1.0" encoding="utf-8"?>\r
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">\r
<ItemGroup Label="ProjectConfigurations">\r
<ProjectConfiguration Include="Debug|Win32">\r
<ClCompile Include="ubidi_props.c" />\r
<ClCompile Include="ubidiln.c" />\r
<ClCompile Include="ubidiwrt.c" />\r
+ <ClCompile Include="uloc_keytype.cpp" />\r
<ClCompile Include="ushape.cpp" />\r
<ClCompile Include="brkeng.cpp">\r
</ClCompile>\r
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />\r
<ImportGroup Label="ExtensionTargets">\r
</ImportGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
<ClCompile Include="usetiter.cpp">\r
<Filter>properties & sets</Filter>\r
</ClCompile>\r
- <ClCompile Include="icuplug.c">\r
- <Filter>registration</Filter>\r
- </ClCompile>\r
<ClCompile Include="serv.cpp">\r
<Filter>registration</Filter>\r
</ClCompile>\r
<ClCompile Include="stringtriebuilder.cpp">\r
<Filter>collections</Filter>\r
</ClCompile>\r
+ <ClCompile Include="icuplug.cpp" />\r
+ <ClCompile Include="uloc_keytype.cpp">\r
+ <Filter>locales & resources</Filter>\r
+ </ClCompile>\r
</ItemGroup>\r
<ItemGroup>\r
<ClInclude Include="ubidi_props.h">\r
<Filter>collections</Filter>\r
</CustomBuild>\r
</ItemGroup>\r
-</Project>\r
+</Project>
\ No newline at end of file
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT,
UCLN_COMMON_SERVICE,
+ UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_ULOC,
return -1;
}
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword)
+{
+ const char* bcpKey = ulocimp_toBcpKey(keyword);
+ if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
+ // unknown keyword, but syntax is fine..
+ return keyword;
+ }
+ return bcpKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value)
+{
+ const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
+ if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
+ // unknown keyword, but syntax is fine..
+ return value;
+ }
+ return bcpType;
+}
+
+#define ISALPHANUM(c) ( (c) >= '0' && (c) <= '9' || (c) >= 'A' && (c) <= 'Z' || (c) >= 'a' && (c) <= 'z' )
+
+static UBool
+isWellFormedLegacyKey(const char* legacyKey)
+{
+ const char* p = legacyKey;
+ while (*p) {
+ if (!ISALPHANUM(*p)) {
+ return FALSE;
+ }
+ p++;
+ }
+ return TRUE;
+}
+
+static UBool
+isWellFormedLegacyType(const char* legacyType)
+{
+ const char* p = legacyType;
+ int32_t alphaNumLen = 0;
+ while (*p) {
+ if (*p == '_' || *p == '/' || *p == '-') {
+ if (alphaNumLen == 0) {
+ return FALSE;
+ }
+ alphaNumLen = 0;
+ } else if (ISALPHANUM(*p)) {
+ alphaNumLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (alphaNumLen != 0);
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword)
+{
+ const char* legacyKey = ulocimp_toLegacyKey(keyword);
+ if (legacyKey == NULL) {
+ // Checks if the specified locale key is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+ // However, a key should not contain '=' obviously. For now, all existing
+ // keys are using ASCII alphabetic letters only. We won't add any new key
+ // that is not compatible with the BCP 47 syntax. Therefore, we assume
+ // a valid key consist from [0-9a-zA-Z], no symbols.
+ if (isWellFormedLegacyKey(keyword)) {
+ return keyword;
+ }
+ }
+ return legacyKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value)
+{
+ const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
+ if (legacyType == NULL) {
+ // Checks if the specified locale type is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+ // However, a type should not contain '=' obviously. For now, all existing
+ // types are using ASCII alphabetic letters with a few symbol letters. We won't
+ // add any new type that is not compatible with the BCP 47 syntax except timezone
+ // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
+ // '-' '_' '/' in the middle.
+ if (isWellFormedLegacyType(value)) {
+ return value;
+ }
+ }
+ return legacyType;
+}
+
/*eof*/
--- /dev/null
+/*\r
+**********************************************************************\r
+* Copyright (C) 2014, International Business Machines\r
+* Corporation and others. All Rights Reserved.\r
+**********************************************************************\r
+*/\r
+#include "unicode/utypes.h"\r
+\r
+#include "cstring.h"\r
+#include "uassert.h"\r
+#include "ucln_cmn.h"\r
+#include "uhash.h"\r
+#include "umutex.h"\r
+#include "uresimp.h"\r
+#include "uvector.h"\r
+\r
+static UHashtable* gLocExtKeyMap = NULL;\r
+static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;\r
+static icu::UVector* gKeyTypeStringPool = NULL;\r
+static icu::UVector* gLocExtKeyDataEntries = NULL;\r
+static icu::UVector* gLocExtTypeEntries = NULL;\r
+\r
+// bit flags for special types\r
+typedef enum {\r
+ SPECIALTYPE_NONE = 0,\r
+ SPECIALTYPE_CODEPOINTS = 1,\r
+ SPECIALTYPE_REORDER_CODE = 2\r
+} SpecialType;\r
+\r
+typedef struct LocExtKeyData {\r
+ const char* legacyId;\r
+ const char* bcpId;\r
+ UHashtable* typeMap;\r
+ uint32_t specialTypes;\r
+} LocExtKeyData;\r
+\r
+typedef struct LocExtType {\r
+ const char* legacyId;\r
+ const char* bcpId;\r
+} LocExtType;\r
+\r
+U_CDECL_BEGIN\r
+\r
+static UBool U_CALLCONV\r
+uloc_key_type_cleanup(void) {\r
+ if (gLocExtKeyMap != NULL) {\r
+ uhash_close(gLocExtKeyMap);\r
+ gLocExtKeyMap = NULL;\r
+ }\r
+\r
+ delete gLocExtKeyDataEntries;\r
+ gLocExtKeyDataEntries = NULL;\r
+\r
+ delete gLocExtTypeEntries;\r
+ gLocExtTypeEntries = NULL;\r
+\r
+ delete gKeyTypeStringPool;\r
+ gKeyTypeStringPool = NULL;\r
+\r
+ gLocExtKeyMapInitOnce.reset();\r
+ return TRUE;\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteKeyTypeStringPoolEntry(void* obj) {\r
+ uprv_free(obj);\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteKeyDataEntry(void* obj) {\r
+ LocExtKeyData* keyData = (LocExtKeyData*)obj;\r
+ if (keyData->typeMap != NULL) {\r
+ uhash_close(keyData->typeMap);\r
+ }\r
+ uprv_free(keyData);\r
+}\r
+\r
+static void U_CALLCONV\r
+uloc_deleteTypeEntry(void* obj) {\r
+ uprv_free(obj);\r
+}\r
+\r
+U_CDECL_END\r
+\r
+\r
+static void U_CALLCONV\r
+initFromResourceBundle(UErrorCode& sts) {\r
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);\r
+\r
+ gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ return;\r
+ }\r
+\r
+ UResourceBundle *keyTypeDataRes = NULL;\r
+ UResourceBundle *keyMapRes = NULL;\r
+ UResourceBundle *typeMapRes = NULL;\r
+ UResourceBundle *typeAliasRes = NULL;\r
+ UResourceBundle *bcpTypeAliasRes = NULL;\r
+\r
+ keyTypeDataRes = ures_openDirect(NULL, "keyTypeData", &sts);\r
+ keyMapRes = ures_getByKey(keyTypeDataRes, "keyMap", NULL, &sts);\r
+ typeMapRes = ures_getByKey(keyTypeDataRes, "typeMap", NULL, &sts);\r
+\r
+ UErrorCode tmpSts = U_ZERO_ERROR;\r
+ typeAliasRes = ures_getByKey(keyTypeDataRes, "typeAlias", NULL, &tmpSts);\r
+ if (U_FAILURE(tmpSts)) {\r
+ typeAliasRes = NULL;\r
+ tmpSts = U_ZERO_ERROR;\r
+ }\r
+ bcpTypeAliasRes = ures_getByKey(keyTypeDataRes, "bcpTypeAlias", NULL, &tmpSts);\r
+ if (U_FAILURE(tmpSts)) {\r
+ bcpTypeAliasRes = NULL;\r
+ tmpSts = U_ZERO_ERROR;\r
+ }\r
+\r
+ // initialize vectors storing dynamically allocated objects\r
+ gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);\r
+ if (gKeyTypeStringPool == NULL || U_FAILURE(sts)) {\r
+ goto close_bundles;\r
+ }\r
+ gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);\r
+ if (gLocExtKeyDataEntries == NULL || U_FAILURE(sts)) {\r
+ goto close_bundles;\r
+ }\r
+ gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);\r
+ if (gLocExtTypeEntries == NULL || U_FAILURE(sts)) {\r
+ goto close_bundles;\r
+ }\r
+\r
+ // iterate through keyMap resource\r
+ UResourceBundle keyMapEntry;\r
+ ures_initStackObject(&keyMapEntry);\r
+\r
+ while (ures_hasNext(keyMapRes)) {\r
+ ures_getNextResource(keyMapRes, &keyMapEntry, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ const char* legacyKeyId = ures_getKey(&keyMapEntry);\r
+ int32_t bcpKeyIdLen = 0;\r
+ const UChar* uBcpKeyId = ures_getString(&keyMapEntry, &bcpKeyIdLen, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ // empty value indicates that BCP key is same with the legacy key.\r
+ const char* bcpKeyId = legacyKeyId;\r
+ if (bcpKeyIdLen > 0) {\r
+ char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);\r
+ if (bcpKeyIdBuf == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);\r
+ bcpKeyIdBuf[bcpKeyIdLen] = 0;\r
+ gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ bcpKeyId = bcpKeyIdBuf;\r
+ }\r
+\r
+ UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;\r
+\r
+ UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ uint32_t specialTypes = SPECIALTYPE_NONE;\r
+\r
+ UResourceBundle* typeAliasResByKey = NULL;\r
+ UResourceBundle* bcpTypeAliasResByKey = NULL;\r
+\r
+ if (typeAliasRes != NULL) {\r
+ typeAliasResByKey = ures_getByKey(typeAliasRes, legacyKeyId, NULL, &tmpSts);\r
+ if (U_FAILURE(tmpSts)) {\r
+ // only a few keys have type alias mapping\r
+ typeAliasResByKey = NULL;\r
+ tmpSts = U_ZERO_ERROR;\r
+ }\r
+ }\r
+ if (bcpTypeAliasRes != NULL) {\r
+ bcpTypeAliasResByKey = ures_getByKey(bcpTypeAliasRes, bcpKeyId, NULL, &tmpSts);\r
+ if (U_FAILURE(tmpSts)) {\r
+ // only a few keys have BCP type alias mapping\r
+ bcpTypeAliasResByKey = NULL;\r
+ tmpSts = U_ZERO_ERROR;\r
+ }\r
+ }\r
+\r
+ // look up type map for the key, and walk through the mapping data\r
+ UResourceBundle* typeMapResByKey = ures_getByKey(typeMapRes, legacyKeyId, NULL, &tmpSts);\r
+ if (U_FAILURE(tmpSts)) {\r
+ // type map for each key must exist\r
+ U_ASSERT(FALSE);\r
+ tmpSts = U_ZERO_ERROR;\r
+ } else {\r
+ UResourceBundle typeMapEntry;\r
+ ures_initStackObject(&typeMapEntry);\r
+\r
+ while (ures_hasNext(typeMapResByKey)) {\r
+ ures_getNextResource(typeMapResByKey, &typeMapEntry, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ const char* legacyTypeId = ures_getKey(&typeMapEntry);\r
+\r
+ // special types\r
+ if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {\r
+ specialTypes |= SPECIALTYPE_CODEPOINTS;\r
+ continue;\r
+ }\r
+ if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {\r
+ specialTypes |= SPECIALTYPE_REORDER_CODE;\r
+ continue;\r
+ }\r
+\r
+ if (isTZ) {\r
+ // a timezone key uses a colon instead of a slash in the resource.\r
+ // e.g. America:Los_Angeles\r
+ if (uprv_strchr(legacyTypeId, ':') != NULL) {\r
+ int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);\r
+ char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);\r
+ if (legacyTypeIdBuf == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ const char* p = legacyTypeId;\r
+ char* q = legacyTypeIdBuf;\r
+ while (*p) {\r
+ if (*p == ':') {\r
+ *q++ = '/';\r
+ } else {\r
+ *q++ = *p;\r
+ }\r
+ p++;\r
+ }\r
+ *q = 0;\r
+\r
+ gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ legacyTypeId = legacyTypeIdBuf;\r
+ }\r
+ }\r
+\r
+ int32_t bcpTypeIdLen = 0;\r
+ const UChar* uBcpTypeId = ures_getString(&typeMapEntry, &bcpTypeIdLen, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ // empty value indicates that BCP type is same with the legacy type.\r
+ const char* bcpTypeId = legacyTypeId;\r
+ if (bcpTypeIdLen > 0) {\r
+ char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);\r
+ if (bcpTypeIdBuf == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);\r
+ bcpTypeIdBuf[bcpTypeIdLen] = 0;\r
+ gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ bcpTypeId = bcpTypeIdBuf;\r
+ }\r
+\r
+ // Note: legacy type value should never be\r
+ // equivalent to bcp type value of a different\r
+ // type under the same key. So we use a single\r
+ // map for lookup.\r
+ LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));\r
+ if (t == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ t->bcpId = bcpTypeId;\r
+ t->legacyId = legacyTypeId;\r
+ gLocExtTypeEntries->addElement((void*)t, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);\r
+ if (bcpTypeId != legacyTypeId) {\r
+ // different type value\r
+ uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);\r
+ }\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ // also put aliases in the map\r
+ if (typeAliasResByKey != NULL) {\r
+ UResourceBundle typeAliasDataEntry;\r
+ ures_initStackObject(&typeAliasDataEntry);\r
+\r
+ ures_resetIterator(typeAliasResByKey);\r
+ while (ures_hasNext(typeAliasResByKey) && U_SUCCESS(sts)) {\r
+ int32_t toLen;\r
+ ures_getNextResource(typeAliasResByKey, &typeAliasDataEntry, &sts);\r
+ const UChar* to = ures_getString(&typeAliasDataEntry, &toLen, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ // check if this is an alias of canoncal legacy type\r
+ if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {\r
+ const char* from = ures_getKey(&typeAliasDataEntry);\r
+ if (isTZ) {\r
+ // replace colon with slash if necessary\r
+ if (uprv_strchr(from, ':') != NULL) {\r
+ int32_t fromLen = uprv_strlen(from);\r
+ char* fromBuf = (char*)uprv_malloc(fromLen + 1);\r
+ if (fromBuf == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ const char* p = from;\r
+ char* q = fromBuf;\r
+ while (*p) {\r
+ if (*p == ':') {\r
+ *q++ = '/';\r
+ } else {\r
+ *q++ = *p;\r
+ }\r
+ p++;\r
+ }\r
+ *q = 0;\r
+\r
+ gKeyTypeStringPool->addElement(fromBuf, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ from = fromBuf;\r
+ }\r
+ }\r
+ uhash_put(typeDataMap, (void*)from, t, &sts);\r
+ }\r
+ }\r
+ ures_close(&typeAliasDataEntry);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (bcpTypeAliasResByKey != NULL) {\r
+ UResourceBundle bcpTypeAliasDataEntry;\r
+ ures_initStackObject(&bcpTypeAliasDataEntry);\r
+\r
+ ures_resetIterator(bcpTypeAliasResByKey);\r
+ while (ures_hasNext(bcpTypeAliasResByKey) && U_SUCCESS(sts)) {\r
+ int32_t toLen;\r
+ ures_getNextResource(bcpTypeAliasResByKey, &bcpTypeAliasDataEntry, &sts);\r
+ const UChar* to = ures_getString(&bcpTypeAliasDataEntry, &toLen, &sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ // check if this is an alias of bcp type\r
+ if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {\r
+ const char* from = ures_getKey(&bcpTypeAliasDataEntry);\r
+ uhash_put(typeDataMap, (void*)from, t, &sts);\r
+ }\r
+ }\r
+ ures_close(&bcpTypeAliasDataEntry);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ ures_close(&typeMapEntry);\r
+ }\r
+ ures_close(typeMapResByKey);\r
+ ures_close(typeAliasResByKey);\r
+ ures_close(bcpTypeAliasResByKey);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));\r
+ if (keyData == NULL) {\r
+ sts = U_MEMORY_ALLOCATION_ERROR;\r
+ break;\r
+ }\r
+ keyData->bcpId = bcpKeyId;\r
+ keyData->legacyId = legacyKeyId;\r
+ keyData->specialTypes = specialTypes;\r
+ keyData->typeMap = typeDataMap;\r
+\r
+ gLocExtKeyDataEntries->addElement((void*)keyData, sts);\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+\r
+ uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);\r
+ if (legacyKeyId != bcpKeyId) {\r
+ // different key value\r
+ uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);\r
+ }\r
+ if (U_FAILURE(sts)) {\r
+ break;\r
+ }\r
+ }\r
+\r
+ ures_close(&keyMapEntry);\r
+\r
+close_bundles:\r
+ ures_close(bcpTypeAliasRes);\r
+ ures_close(typeAliasRes);\r
+ ures_close(typeMapRes);\r
+ ures_close(keyMapRes);\r
+ ures_close(keyTypeDataRes);\r
+}\r
+\r
+static UBool\r
+init() {\r
+ UErrorCode sts = U_ZERO_ERROR;\r
+ umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);\r
+ if (U_FAILURE(sts)) {\r
+ return FALSE;\r
+ }\r
+ return TRUE;\r
+}\r
+\r
+static UBool\r
+isSpecialTypeCodepoints(const char* val) {\r
+ int32_t subtagLen = 0;\r
+ const char* p = val;\r
+ while (*p) {\r
+ if (*p == '-') {\r
+ if (subtagLen < 4 || subtagLen > 6) {\r
+ return FALSE;\r
+ }\r
+ subtagLen = 0;\r
+ } else if (('0' <= *p && *p <= '9') ||\r
+ ('A' <= *p && *p <= 'F') || ('a' <= *p && *p <= 'f')) {\r
+ subtagLen++;\r
+ } else {\r
+ return FALSE;\r
+ }\r
+ p++;\r
+ }\r
+ return (subtagLen >= 4 && subtagLen <= 6);\r
+}\r
+\r
+static UBool\r
+isSpecialTypeReorderCode(const char* val) {\r
+ int32_t subtagLen = 0;\r
+ const char* p = val;\r
+ while (*p) {\r
+ if (*p == '-') {\r
+ if (subtagLen < 3 || subtagLen > 8) {\r
+ return FALSE;\r
+ }\r
+ subtagLen = 0;\r
+ } else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z')) {\r
+ subtagLen++;\r
+ } else {\r
+ return FALSE;\r
+ }\r
+ p++;\r
+ }\r
+ return (subtagLen >=3 && subtagLen <=8);\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toBcpKey(const char* key) {\r
+ if (!init()) {\r
+ return NULL;\r
+ }\r
+\r
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+ if (keyData != NULL) {\r
+ return keyData->bcpId;\r
+ }\r
+ return NULL;\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toLegacyKey(const char* key) {\r
+ if (!init()) {\r
+ return NULL;\r
+ }\r
+\r
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+ if (keyData != NULL) {\r
+ return keyData->legacyId;\r
+ }\r
+ return NULL;\r
+}\r
+\r
+U_CFUNC const char*\r
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {\r
+ if (isKnownKey != NULL) {\r
+ *isKnownKey = FALSE;\r
+ }\r
+ if (isSpecialType != NULL) {\r
+ *isSpecialType = FALSE;\r
+ }\r
+\r
+ if (!init()) {\r
+ return NULL;\r
+ }\r
+\r
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+ if (keyData != NULL) {\r
+ if (isKnownKey != NULL) {\r
+ *isKnownKey = TRUE;\r
+ }\r
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);\r
+ if (t != NULL) {\r
+ return t->bcpId;\r
+ }\r
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {\r
+ UBool matched = FALSE;\r
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {\r
+ matched = isSpecialTypeCodepoints(type);\r
+ }\r
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {\r
+ matched = isSpecialTypeReorderCode(type);\r
+ }\r
+ if (matched) {\r
+ if (isSpecialType != NULL) {\r
+ *isSpecialType = TRUE;\r
+ }\r
+ return type;\r
+ }\r
+ }\r
+ }\r
+ return NULL;\r
+}\r
+\r
+\r
+U_CFUNC const char*\r
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {\r
+ if (isKnownKey != NULL) {\r
+ *isKnownKey = FALSE;\r
+ }\r
+ if (isSpecialType != NULL) {\r
+ *isSpecialType = FALSE;\r
+ }\r
+\r
+ if (!init()) {\r
+ return NULL;\r
+ }\r
+\r
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);\r
+ if (keyData != NULL) {\r
+ if (isKnownKey != NULL) {\r
+ *isKnownKey = TRUE;\r
+ }\r
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);\r
+ if (t != NULL) {\r
+ return t->legacyId;\r
+ }\r
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {\r
+ UBool matched = FALSE;\r
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {\r
+ matched = isSpecialTypeCodepoints(type);\r
+ }\r
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {\r
+ matched = isSpecialTypeReorderCode(type);\r
+ }\r
+ if (matched) {\r
+ if (isSpecialType != NULL) {\r
+ *isSpecialType = TRUE;\r
+ }\r
+ return type;\r
+ }\r
+ }\r
+ }\r
+ return NULL;\r
+}\r
+\r
return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
}
-static UBool
-_isLDMLKey(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
return FALSE;
}
-static UBool
-_isLDMLType(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+ const char* p;
+ int32_t subtagLen = 0;
+
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
- if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
+
+ for (p = s; len > 0; p++, len--) {
+ if (*p == SEP) {
+ if (subtagLen < 3) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
+ subtagLen++;
+ if (subtagLen > 8) {
+ return FALSE;
+ }
+ } else {
+ return FALSE;
+ }
}
- return FALSE;
-}
+ return (subtagLen >= 3);
+}
/*
* -------------------------------------------------
*
langtag->privateuse = EMPTY;
}
-#define KEYTYPEDATA "keyTypeData"
-#define KEYMAP "keyMap"
-#define TYPEMAP "typeMap"
-#define TYPEALIAS "typeAlias"
-#define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
-#define MAX_LDML_KEY_LEN 22
-#define MAX_LDML_TYPE_LEN 32
-
-static int32_t
-_ldmlKeyToBCP47(const char* key, int32_t keyLen,
- char* bcpKey, int32_t bcpKeyCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char keyBuf[MAX_LDML_KEY_LEN];
- char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- const UChar *uBcpKey;
- int32_t bcpKeyLen;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
-
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, KEYMAP, rb, status);
-
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpKeyLen == 0) {
- /* empty value indicates the BCP47 key is same with the legacy key */
- uprv_memcpy(bcpKeyBuf, key, keyLen);
- bcpKeyBuf[keyLen] = 0;
- resultLen = keyLen;
- } else {
- u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
- resultLen = bcpKeyLen;
- }
- } else {
- if (_isLDMLKey(key, keyLen)) {
- uprv_memcpy(bcpKeyBuf, key, keyLen);
- bcpKeyBuf[keyLen] = 0;
- resultLen = keyLen;
- } else {
- /* mapping not availabe */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- ures_close(rb);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
- return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
- char* key, int32_t keyCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- const char *resKey = NULL;
- UResourceBundle *mapData;
-
- if (bcpKeyLen < 0) {
- bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
- }
-
- if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < bcpKeyLen; i++) {
- bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, KEYMAP, rb, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- mapData = ures_getNextResource(rb, NULL, status);
- while (U_SUCCESS(*status)) {
- const UChar *uBcpKey;
- char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t tmpBcpKeyLen;
- const char *tmpBcpKey = tmpBcpKeyBuf;
-
- uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
- if (U_FAILURE(*status)) {
- break;
- }
- if (tmpBcpKeyLen == 0) {
- /* empty value indicates the BCP47 key is same with the legacy key */
- tmpBcpKey = ures_getKey(mapData);
- } else {
- u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
- tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
- }
- if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKey) == 0) {
- /* found a matching BCP47 key */
- resKey = ures_getKey(mapData);
- resultLen = (int32_t)uprv_strlen(resKey);
- break;
- }
- if (!ures_hasNext(rb)) {
- break;
- }
- ures_getNextResource(rb, mapData, status);
- }
- ures_close(mapData);
- ures_close(rb);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- if (resKey == NULL) {
- resKey = bcpKeyBuf;
- resultLen = bcpKeyLen;
- }
-
- uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
- return u_terminateChars(key, keyCapacity, resultLen, status);
-}
-
-static int32_t
-_ldmlTypeToBCP47(const char* key, int32_t keyLen,
- const char* type, int32_t typeLen,
- char* bcpType, int32_t bcpTypeCapacity,
- UErrorCode *status) {
- UResourceBundle *rb, *keyTypeData, *typeMapForKey;
- char keyBuf[MAX_LDML_KEY_LEN];
- char typeBuf[MAX_LDML_TYPE_LEN];
- char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- const UChar *uBcpType, *uCanonicalType;
- int32_t bcpTypeLen, canonicalTypeLen;
- UBool isTimezone = FALSE;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
- if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
- isTimezone = TRUE;
- }
-
- if (typeLen < 0) {
- typeLen = (int32_t)uprv_strlen(type);
- }
- if (typeLen >= sizeof(typeBuf)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if (isTimezone) {
- /* replace '/' with ':' */
- for (i = 0; i < typeLen; i++) {
- if (*(type + i) == '/') {
- typeBuf[i] = ':';
- } else {
- typeBuf[i] = *(type + i);
- }
- }
- typeBuf[typeLen] = 0;
- type = &typeBuf[0];
- }
-
- keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
- rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- ures_close(keyTypeData);
- return 0;
- }
-
- typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
- uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- uprv_memcpy(bcpTypeBuf, type, typeLen);
- resultLen = typeLen;
- } else {
- u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
- resultLen = bcpTypeLen;
- }
- } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
- /* is this type alias? */
- tmpStatus = U_ZERO_ERROR;
- ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
- ures_getByKey(rb, keyBuf, rb, &tmpStatus);
- uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
- if (isTimezone) {
- /* replace '/' with ':' */
- for (i = 0; i < canonicalTypeLen; i++) {
- if (typeBuf[i] == '/') {
- typeBuf[i] = ':';
- }
- }
- }
- typeBuf[canonicalTypeLen] = 0;
-
- /* look up the canonical type */
- uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- uprv_memcpy(bcpTypeBuf, typeBuf, canonicalTypeLen);
- resultLen = canonicalTypeLen;
- } else {
- u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
- resultLen = bcpTypeLen;
- }
- }
- }
- if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
- if (_isLDMLType(type, typeLen)) {
- uprv_memcpy(bcpTypeBuf, type, typeLen);
- resultLen = typeLen;
- } else {
- /* mapping not availabe */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- } else {
- *status = tmpStatus;
- }
- ures_close(rb);
- ures_close(typeMapForKey);
- ures_close(keyTypeData);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
- return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLType(const char* key, int32_t keyLen,
- const char* bcpType, int32_t bcpTypeLen,
- char* type, int32_t typeCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char keyBuf[MAX_LDML_KEY_LEN];
- char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
- int32_t resultLen = 0;
- int32_t i, typeSize;
- const char *resType = NULL;
- UResourceBundle *mapData;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t copyLen;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
-
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
-
-
- if (bcpTypeLen < 0) {
- bcpTypeLen = (int32_t)uprv_strlen(bcpType);
- }
-
- typeSize = 0;
- for (i = 0; i < bcpTypeLen; i++) {
- if (bcpType[i] == SEP) {
- if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- typeSize = 0;
- } else {
- typeSize++;
- }
- }
-
- uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
- bcpTypeBuf[bcpTypeLen] = 0;
-
- /* to lower case */
- for (i = 0; i < bcpTypeLen; i++) {
- bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, TYPEMAP, rb, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- ures_getByKey(rb, keyBuf, rb, &tmpStatus);
- mapData = ures_getNextResource(rb, NULL, &tmpStatus);
- while (U_SUCCESS(tmpStatus)) {
- const UChar *uBcpType;
- char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t tmpBcpTypeLen;
- const char *tmpBcpType = tmpBcpTypeBuf;
-
- uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- break;
- }
- if (tmpBcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- tmpBcpType = ures_getKey(mapData);
- } else {
- u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
- tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
- }
- if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpType) == 0) {
- /* found a matching BCP47 type */
- resType = ures_getKey(mapData);
- resultLen = (int32_t)uprv_strlen(resType);
- break;
- }
- if (!ures_hasNext(rb)) {
- break;
- }
- ures_getNextResource(rb, mapData, &tmpStatus);
- }
- ures_close(mapData);
- ures_close(rb);
-
- if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
- *status = tmpStatus;
- return 0;
- }
-
- if (resType == NULL) {
- resType = bcpTypeBuf;
- resultLen = bcpTypeLen;
- }
-
- copyLen = uprv_min(resultLen, typeCapacity);
- uprv_memcpy(type, resType, copyLen);
-
- if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
- for (i = 0; i < copyLen; i++) {
- if (*(type + i) == ':') {
- *(type + i) = '/';
- }
- }
- }
-
- return u_terminateChars(type, typeCapacity, resultLen, status);
-}
-
static int32_t
_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
char buf[ULOC_LANG_CAPACITY];
const char *bcpKey, *bcpValue;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
- UBool isLDMLKeyword;
+ UBool isBcpUExt;
while (TRUE) {
isAttribute = FALSE;
break;
}
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus)) {
+ /* buf must be null-terminated */
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
keylen = (int32_t)uprv_strlen(key);
- isLDMLKeyword = (keylen > 1);
+ isBcpUExt = (keylen > 1);
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
}
}
}
- } else if (isLDMLKeyword) {
- int32_t modKeyLen;
-
- /* transform key and value to bcp47 style */
- modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ } else if (isBcpUExt) {
+ bcpKey = uloc_toUnicodeLocaleKey(key);
+ if (bcpKey == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
- tmpStatus = U_ZERO_ERROR;
continue;
}
- bcpKey = pExtBuf;
- pExtBuf += (modKeyLen + 1);
- extBufCapacity -= (modKeyLen + 1);
-
- len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ /* we've checked buf is null-terminated above */
+ bcpValue = uloc_toUnicodeLocaleType(key, buf);
+ if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
- tmpStatus = U_ZERO_ERROR;
continue;
}
- bcpValue = pExtBuf;
- pExtBuf += (len + 1);
- extBufCapacity -= (len + 1);
+ if (bcpValue == buf) {
+ /*
+ When uloc_toUnicodeLocaleType(key, buf) returns the
+ input value as is, the value is well-formed, but has
+ no known mapping. This implementation normalizes the
+ the value to lower case
+ */
+ int32_t bcpValueLen = uprv_strlen(bcpValue);
+ if (bcpValueLen < extBufCapacity) {
+ uprv_strcpy(pExtBuf, bcpValue);
+ T_CString_toLowerCase(pExtBuf);
+
+ bcpValue = pExtBuf;
+
+ pExtBuf += (bcpValueLen + 1);
+ extBufCapacity -= (bcpValueLen + 1);
+ } else {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ }
} else {
if (*key == PRIVATEUSE) {
if (!_isPrivateuseValueSubtags(buf, len)) {
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
- if (_isLDMLKey(pTag, len)) {
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
pKwds = pTag;
break;
}
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
- if (_isLDMLKey(pTag, len)) {
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
if (pBcpKey) {
emitKeyword = TRUE;
pNextBcpKey = pTag;
const char *pKey = NULL; /* LDML key */
const char *pType = NULL; /* LDML type */
+ char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
+
U_ASSERT(pBcpKey != NULL);
+ if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
+ /* the BCP key is invalid */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+
+ uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
+ bcpKeyBuf[bcpKeyLen] = 0;
+
/* u extension key to LDML key */
- len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
- if (U_FAILURE(*status)) {
+ pKey = uloc_toLegacyKey(bcpKeyBuf);
+ if (pKey == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
- pKey = buf + bufIdx;
- bufIdx += len;
- *(buf + bufIdx) = 0;
- bufIdx++;
+ if (pKey == bcpKeyBuf) {
+ /*
+ The key returned by toLegacyKey points to the input buffer.
+ We normalize the result key to lower case.
+ */
+ T_CString_toLowerCase(bcpKeyBuf);
+ if (bufSize - bufIdx - 1 >= bcpKeyLen) {
+ uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
+ pKey = buf + bufIdx;
+ bufIdx += bcpKeyLen;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ goto cleanup;
+ }
+ }
if (pBcpType) {
+ char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
+ if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
+ /* the BCP type is too long */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+
+ uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
+ bcpTypeBuf[bcpTypeLen] = 0;
+
/* BCP type to locale type */
- len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
- if (U_FAILURE(*status)) {
+ pType = uloc_toLegacyType(pKey, bcpTypeBuf);
+ if (pType == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
- pType = buf + bufIdx;
- bufIdx += len;
- *(buf + bufIdx) = 0;
- bufIdx++;
+ if (pType == bcpTypeBuf) {
+ /*
+ The type returned by toLegacyType points to the input buffer.
+ We normalize the result type to lower case.
+ */
+ /* normalize to lower case */
+ T_CString_toLowerCase(bcpTypeBuf);
+ if (bufSize - bufIdx - 1 >= bcpTypeLen) {
+ uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
+ pType = buf + bufIdx;
+ bufIdx += bcpTypeLen;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ goto cleanup;
+ }
+ }
} else {
/* typeless - default type value is "yes" */
pType = LOCALE_TYPE_YES;
/*
**********************************************************************
-* Copyright (C) 2004-2010, International Business Machines
+* Copyright (C) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char* s, int32_t len);
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
#endif
UBool strict,
UErrorCode* err);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /* U_HIDE_DRAFT_API */
+
#endif /*_ULOC*/
TESTCASE(TestEnglishExemplarCharacters);
TESTCASE(TestDisplayNameBrackets);
TESTCASE(TestIsRightToLeft);
+ TESTCASE(TestToUnicodeLocaleKey);
+ TESTCASE(TestToLegacyKey);
+ TESTCASE(TestToUnicodeLocaleType);
+ TESTCASE(TestToLegacyType);
}
}
const char* const locale_to_langtag[][3] = {
- {"@x=elmer", "x-elmer", "x-elmer"},
{"", "und", "und"},
{"en", "en", "en"},
{"en_US", "en-US", "en-US"},
{"en@timezone=America/New_York;calendar=japanese", "en-u-ca-japanese-tz-usnyc", "en-u-ca-japanese-tz-usnyc"},
{"en@timezone=US/Eastern", "en-u-tz-usnyc", "en-u-tz-usnyc"},
{"en@x=x-y-z;a=a-b-c", "en-x-x-y-z", NULL},
- {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic", NULL},
+ {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic", NULL},
{"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
- {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-EUR-va-posix", "en-US-u-ca-japanese-cu-EUR-va-posix"},
+ {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
{"@x=elmer", "x-elmer", "x-elmer"},
{"en@x=elmer", "en-x-elmer", "en-x-elmer"},
{"@x=elmer;a=exta", "und-a-exta-x-elmer", "und-a-exta-x-elmer"},
const char *locID;
int32_t len;
} langtag_to_locale[] = {
+ {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH},
{"en", "en", FULL_LENGTH},
{"en-us", "en_US", FULL_LENGTH},
{"und-US", "_US", FULL_LENGTH},
}
}
+static void TestToUnicodeLocaleKey(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][2] = {
+ {"calendar", "ca"},
+ {"CALEndar", "ca"}, /* difference casing */
+ {"ca", "ca"}, /* bcp key itself */
+ {"kv", "kv"}, /* no difference between legacy and bcp */
+ {"foo", NULL}, /* unknown, bcp ill-formed */
+ {"ZZ", "$IN"}, /* unknown, bcp well-formed - */
+ {NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* expected = DATA[i][1];
+ const char* bcpKey = NULL;
+
+ bcpKey = uloc_toUnicodeLocaleKey(keyword);
+ if (expected == NULL) {
+ if (bcpKey != NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", keyword, bcpKey);
+ }
+ } else if (bcpKey == NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (bcpKey != keyword) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, bcpKey, keyword);
+ }
+ } else if (uprv_strcmp(bcpKey, expected) != 0) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s\n", keyword, bcpKey, expected);
+ }
+ }
+}
+
+static void TestToLegacyKey(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][2] = {
+ {"kb", "colbackwards"},
+ {"kB", "colbackwards"}, /* different casing */
+ {"Collation", "collation"}, /* keyword itself with different casing */
+ {"kv", "kv"}, /* no difference between legacy and bcp */
+ {"foo", "$IN"}, /* unknown, bcp ill-formed */
+ {"ZZ", "$IN"}, /* unknown, bcp well-formed */
+ {"e=mc2", NULL}, /* unknown, bcp/legacy ill-formed */
+ {NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* expected = DATA[i][1];
+ const char* legacyKey = NULL;
+
+ legacyKey = uloc_toLegacyKey(keyword);
+ if (expected == NULL) {
+ if (legacyKey != NULL) {
+ log_err("toLegacyKey: keyword=%s => %s, expected=NULL\n", keyword, legacyKey);
+ }
+ } else if (legacyKey == NULL) {
+ log_err("toLegacyKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (legacyKey != keyword) {
+ log_err("toLegacyKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, legacyKey, keyword);
+ }
+ } else if (uprv_strcmp(legacyKey, expected) != 0) {
+ log_err("toUnicodeLocaleKey: keyword=%s, %s, expected=%s\n", keyword, legacyKey, expected);
+ }
+ }
+}
+
+static void TestToUnicodeLocaleType(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][3] = {
+ {"tz", "Asia/Kolkata", "inccu"},
+ {"calendar", "gregorian", "gregory"},
+ {"ca", "gregorian", "gregory"},
+ {"ca", "Gregorian", "gregory"},
+ {"ca", "buddhist", "buddhist"},
+ {"Calendar", "Japanese", "japanese"},
+ {"calendar", "Islamic-Civil", "islamic-civil"},
+ {"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
+ {"colalternate", "NON-IGNORABLE", "noignore"},
+ {"colcaselevel", "yes", "true"},
+ {"tz", "america/new_york", "usnyc"},
+ {"tz", "Asia/Kolkata", "inccu"},
+ {"timezone", "navajo", "usden"},
+ {"ca", "aaaa", "$IN"}, /* unknown type, well-formed type */
+ {"ca", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+ {"zz", "gregorian", NULL}, /* unknown key, ill-formed type */
+ {"co", "foo-", NULL}, /* unknown type, ill-formed type */
+ {"variableTop", "00A0", "$IN"}, /* valid codepoints type */
+ {"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
+ {"kr", "space-punct", "space-punct"}, /* valid reordercode type */
+ {"kr", "digit-spacepunct", NULL}, /* invalid (bcp ill-formed) reordercode type */
+ {NULL, NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* value = DATA[i][1];
+ const char* expected = DATA[i][2];
+ const char* bcpType = NULL;
+
+ bcpType = uloc_toUnicodeLocaleType(keyword, value);
+ if (expected == NULL) {
+ if (bcpType != NULL) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, bcpType);
+ }
+ } else if (bcpType == NULL) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (bcpType != value) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, bcpType, value);
+ }
+ } else if (uprv_strcmp(bcpType, expected) != 0) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, bcpType, expected);
+ }
+ }
+}
+
+static void TestToLegacyType(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][3] = {
+ {"calendar", "gregory", "gregorian"},
+ {"ca", "gregory", "gregorian"},
+ {"ca", "Gregory", "gregorian"},
+ {"ca", "buddhist", "buddhist"},
+ {"Calendar", "Japanese", "japanese"},
+ {"calendar", "Islamic-Civil", "islamic-civil"},
+ {"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
+ {"colalternate", "noignore", "non-ignorable"},
+ {"colcaselevel", "true", "yes"},
+ {"tz", "usnyc", "America/New_York"},
+ {"tz", "inccu", "Asia/Calcutta"},
+ {"timezone", "usden", "America/Denver"},
+ {"timezone", "usnavajo", "America/Denver"}, /* bcp type alias */
+ {"colstrength", "quarternary", "quaternary"}, /* type alias */
+ {"ca", "aaaa", "$IN"}, /* unknown type */
+ {"calendar", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+ {"zz", "gregorian", "$IN"}, /* unknown key, bcp ill-formed type */
+ {"ca", "gregorian-calendar", "$IN"}, /* known key, bcp ill-formed type */
+ {"co", "e=mc2", NULL}, /* known key, ill-formed bcp/legacy type */
+ {"variableTop", "00A0", "$IN"}, /* valid codepoints type */
+ {"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
+ {"kr", "space-punct", "space-punct"}, /* valid reordercode type */
+ {"kr", "digit-spacepunct", "digit-spacepunct"}, /* invalid reordercode type, bad ok for legacy syntax */
+ {NULL, NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* value = DATA[i][1];
+ const char* expected = DATA[i][2];
+ const char* legacyType = NULL;
+
+ legacyType = uloc_toLegacyType(keyword, value);
+ if (expected == NULL) {
+ if (legacyType != NULL) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, legacyType);
+ }
+ } else if (legacyType == NULL) {
+ log_err("toLegacyType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (legacyType != value) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, legacyType, value);
+ }
+ } else if (uprv_strcmp(legacyType, expected) != 0) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, legacyType, expected);
+ }
+ }
+}
+
+
+
static void test_unicode_define(const char *namech, char ch, const char *nameu, UChar uch)
{
UChar asUch[1];
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2013, International Business Machines Corporation and
+ * Copyright (c) 1997-2014, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
static void TestForLanguageTag(void);
static void TestToLanguageTag(void);
+static void TestToUnicodeLocaleKey(void);
+static void TestToLegacyKey(void);
+static void TestToUnicodeLocaleType(void);
+static void TestToLegacyType(void);
+
/**
* locale data
*/