From: Yoshito Umaoka Date: Fri, 11 May 2012 21:52:24 +0000 (+0000) Subject: ICU-9265 Added support for typeless keyword in BCP 47 u extension. Also implemented... X-Git-Tag: milestone-59-0-1~3827 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=948048b4d214c8aef3ad7883e38273b262eed7f2;p=icu ICU-9265 Added support for typeless keyword in BCP 47 u extension. Also implemented u extension attributes. The implementation of uloc_forLanguageTag is now equivalent to ICU4J implementation. X-SVN-Rev: 31816 --- diff --git a/icu4c/source/common/uloc_tag.c b/icu4c/source/common/uloc_tag.c index 5db5e8f4133..1bc32c2c932 100644 --- a/icu4c/source/common/uloc_tag.c +++ b/icu4c/source/common/uloc_tag.c @@ -15,6 +15,7 @@ #include "putilimp.h" #include "uinvchar.h" #include "ulocimp.h" +#include "uassert.h" /* struct holding a single variant */ typedef struct VariantListEntry { @@ -44,7 +45,6 @@ typedef struct ULanguageTag { const char *region; VariantListEntry *variants; ExtensionListEntry *extensions; - AttributeListEntry *attributes; const char *privateuse; const char *grandfathered; } ULanguageTag; @@ -70,6 +70,7 @@ static const char* POSIX_KEY = "va"; static const char* POSIX_VALUE = "posix"; static const char* LOCALE_ATTRIBUTE_KEY = "attribute"; static const char* PRIVUSE_VARIANT_PREFIX = "lvariant"; +static const char* LOCALE_TYPE_YES = "yes"; #define LANG_UND_LEN 3 @@ -152,15 +153,6 @@ ultag_getVariant(const ULanguageTag* langtag, int32_t idx); static int32_t ultag_getVariantsSize(const ULanguageTag* langtag); -#if 0 -/* Currently not being used. */ -static const char* -ultag_getAttribute(const ULanguageTag* langtag, int32_t idx); -#endif - -static int32_t -ultag_getAttributesSize(const ULanguageTag* langtag); - static const char* ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); @@ -627,8 +619,6 @@ _initializeULanguageTag(ULanguageTag* langtag) { langtag->variants = NULL; langtag->extensions = NULL; - langtag->attributes = NULL; - langtag->grandfathered = EMPTY; langtag->privateuse = EMPTY; } @@ -1203,7 +1193,7 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac if (_isVariantSubtag(pVar, -1)) { if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) { /* emit the variant to the list */ - var = uprv_malloc(sizeof(VariantListEntry)); + var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); if (var == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; break; @@ -1350,12 +1340,12 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } /* create AttributeListEntry */ - attr = uprv_malloc(sizeof(AttributeListEntry)); + attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); if (attr == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; break; } - attrValue = uprv_malloc(attrBufLength + 1); + attrValue = (char*)uprv_malloc(attrBufLength + 1); if (attrValue == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; break; @@ -1440,7 +1430,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac if (!isAttribute) { /* create ExtensionListEntry */ - ext = uprv_malloc(sizeof(ExtensionListEntry)); + ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); if (ext == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; break; @@ -1461,7 +1451,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac /* Special handling for POSIX variant - add the keywords for POSIX */ if (hadPosix) { /* create ExtensionListEntry for POSIX */ - ext = uprv_malloc(sizeof(ExtensionListEntry)); + ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); if (ext == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto cleanup; @@ -1567,120 +1557,234 @@ cleanup: */ static void _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { - const char *p, *pNext, *pSep, *pTmp, *pTmpStart; - const char *pBcpKey, *pBcpType; - const char *pKey, *pType; - int32_t bcpKeyLen = 0, bcpTypeLen; + const char *pTag; /* beginning of current subtag */ + const char *pKwds; /* beginning of key-type pairs */ + UBool variantExists = *posixVariant; + + ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ ExtensionListEntry *kwd, *nextKwd; - ExtensionListEntry *kwdFirst = NULL; + + AttributeListEntry *attrFirst = NULL; /* first attribute */ + AttributeListEntry *attr, *nextAttr; + + int32_t len; int32_t bufIdx = 0; - int32_t len; - UBool variantExists = *posixVariant; - UBool searchFurther; + + char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + int32_t attrBufIdx = 0; /* Reset the posixVariant value */ *posixVariant = FALSE; - pNext = ldmlext; - pBcpKey = pBcpType = NULL; - while (pNext) { - p = pSep = pNext; + pTag = ldmlext; + pKwds = NULL; + /* Iterate through u extension attributes */ + while (*pTag) { /* locate next separator char */ - while (*pSep) { - if (*pSep == SEP) { - searchFurther = FALSE; - if (pBcpKey != NULL) { - pTmpStart = (pSep + 1); - pTmp = pTmpStart; - /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */ - while (*pTmp) { - if (*pTmp == SEP || *(pTmp + 1) == 0) { - if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) { - searchFurther = TRUE; - } - break; - } - pTmp++; - } - } - if (searchFurther) { - pSep++; - continue; - } else { - break; - } - } - pSep++; + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (_isLDMLKey(pTag, len)) { + pKwds = pTag; + break; } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; + + /* add this attribute to the list */ + attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); + if (attr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; } - if (pBcpKey == NULL) { - pBcpKey = p; - bcpKeyLen = (int32_t)(pSep - p); + if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { + uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); + attrBuf[attrBufIdx + len] = 0; + attr->attribute = &attrBuf[attrBufIdx]; + attrBufIdx += (len + 1); } else { - pBcpType = p; - bcpTypeLen = (int32_t)(pSep - p); + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } - /* BCP key to locale key */ - len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); - if (U_FAILURE(*status)) { - goto cleanup; - } - pKey = buf + bufIdx; - bufIdx += len; - *(buf + bufIdx) = 0; - bufIdx++; + if (!_addAttributeToList(&attrFirst, attr)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(attr); + goto cleanup; + } - /* BCP type to locale type */ - len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); - if (U_FAILURE(*status)) { - goto cleanup; + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } + } + + if (attrFirst) { + /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ + + if (attrBufIdx > bufSize) { + /* attrBufIdx == + 1 */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + goto cleanup; + } + + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + + kwd->key = LOCALE_ATTRIBUTE_KEY; + kwd->value = buf; + + /* attribute subtags sorted in alphabetical order as type */ + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + + /* buffer size check is done above */ + if (attr != attrFirst) { + *(buf + bufIdx) = SEP; + bufIdx++; } - pType = buf + bufIdx; + + len = uprv_strlen(attr->attribute); + uprv_memcpy(buf + bufIdx, attr->attribute, len); bufIdx += len; - *(buf + bufIdx) = 0; - bufIdx++; - /* Special handling for u-va-posix, since we want to treat this as a variant, not */ - /* as a keyword. */ + attr = nextAttr; + } + *(buf + bufIdx) = 0; + bufIdx++; + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(kwd); + goto cleanup; + } - if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) { - *posixVariant = TRUE; + /* once keyword entry is created, delete the attribute list */ + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + uprv_free(attr); + attr = nextAttr; + } + attrFirst = NULL; + } + + if (pKwds) { + const char *pBcpKey = NULL; /* u extenstion key subtag */ + const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ + int32_t bcpKeyLen = 0; + int32_t bcpTypeLen = 0; + UBool isDone = FALSE; + + pTag = pKwds; + /* BCP47 representation of LDML key/type pairs */ + while (!isDone) { + const char *pNextBcpKey = NULL; + int32_t nextBcpKeyLen; + UBool emitKeyword = FALSE; + + if (*pTag) { + /* locate next separator char */ + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (_isLDMLKey(pTag, len)) { + if (pBcpKey) { + emitKeyword = TRUE; + pNextBcpKey = pTag; + nextBcpKeyLen = len; + } else { + pBcpKey = pTag; + bcpKeyLen = len; + } + } else { + U_ASSERT(pBcpKey != NULL); + /* within LDML type subtags */ + if (pBcpType) { + bcpTypeLen += (len + 1); + } else { + pBcpType = pTag; + bcpTypeLen = len; + } + } + + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } } else { - /* create an ExtensionListEntry for this keyword */ - kwd = uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; + /* processing last one */ + emitKeyword = TRUE; + isDone = TRUE; + } + + if (emitKeyword) { + const char *pKey = NULL; /* LDML key */ + const char *pType = NULL; /* LDML type */ + + U_ASSERT(pBcpKey != NULL); + + /* u extension key to LDML key */ + len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status); + if (U_FAILURE(*status)) { goto cleanup; } + pKey = buf + bufIdx; + bufIdx += len; + *(buf + bufIdx) = 0; + bufIdx++; + + if (pBcpType) { + /* BCP type to locale type */ + len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status); + if (U_FAILURE(*status)) { + goto cleanup; + } + pType = buf + bufIdx; + bufIdx += len; + *(buf + bufIdx) = 0; + bufIdx++; + } else { + /* typeless - default type value is "yes" */ + pType = LOCALE_TYPE_YES; + } - kwd->key = pKey; - kwd->value = pType; + /* Special handling for u-va-posix, since we want to treat this as a variant, + not as a keyword */ + if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { + *posixVariant = TRUE; + } else { + /* create an ExtensionListEntry for this keyword */ + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(kwd); - goto cleanup; + kwd->key = pKey; + kwd->value = pType; + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(kwd); + goto cleanup; + } } - } - /* for next pair */ - pBcpKey = NULL; - pBcpType = NULL; + pBcpKey = pNextBcpKey; + bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; + pBcpType = NULL; + bcpTypeLen = 0; + } } } - if (pBcpKey != NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - kwd = kwdFirst; while (kwd != NULL) { nextKwd = kwd->next; @@ -1691,6 +1795,13 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT return; cleanup: + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + uprv_free(attr); + attr = nextAttr; + } + kwd = kwdFirst; while (kwd != NULL) { nextKwd = kwd->next; @@ -1707,8 +1818,6 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC int32_t len; ExtensionListEntry *kwdFirst = NULL; ExtensionListEntry *kwd; - AttributeListEntry *attrFirst = NULL; - AttributeListEntry *attr; const char *key, *type; char *kwdBuf = NULL; int32_t kwdBufLength = capacity; @@ -1718,7 +1827,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC return 0; } - kwdBuf = (char *)uprv_malloc(kwdBufLength); + kwdBuf = (char*)uprv_malloc(kwdBufLength); if (kwdBuf == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return 0; @@ -1741,7 +1850,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC break; } } else { - kwd = uprv_malloc(sizeof(ExtensionListEntry)); + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); if (kwd == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; break; @@ -1760,7 +1869,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC type = ultag_getPrivateUse(langtag); if ((int32_t)uprv_strlen(type) > 0) { /* add private use as a keyword */ - kwd = uprv_malloc(sizeof(ExtensionListEntry)); + kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); if (kwd == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; } else { @@ -1784,78 +1893,45 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC reslen += len; } - attrFirst = langtag->attributes; - if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) { + if (U_SUCCESS(*status) && kwdFirst != NULL) { /* write out the sorted keywords */ UBool firstValue = TRUE; - UBool firstAttr = TRUE; kwd = kwdFirst; - attr = attrFirst; do { if (reslen < capacity) { if (firstValue) { /* '@' */ *(appendAt + reslen) = LOCALE_EXT_SEP; firstValue = FALSE; - } else if (attr) { - /* '-' */ - *(appendAt + reslen) = SEP; - }else { + } else { /* ';' */ *(appendAt + reslen) = LOCALE_KEYWORD_SEP; } } reslen++; - if (attr) { - if (firstAttr) { - len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen)); - } - reslen += len; - - /* '=' */ - if (reslen < capacity) { - *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; - } - - reslen++; - - firstAttr = FALSE; - } - - len = (int32_t)uprv_strlen(attr->attribute); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); - } - reslen += len; - - attr = attr->next; - } else if (kwd) { - /* key */ - len = (int32_t)uprv_strlen(kwd->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - - /* '=' */ - if (reslen < capacity) { - *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; - } - reslen++; + /* key */ + len = (int32_t)uprv_strlen(kwd->key); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); + } + reslen += len; - /* type */ - len = (int32_t)uprv_strlen(kwd->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); - } - reslen += len; + /* '=' */ + if (reslen < capacity) { + *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; + } + reslen++; - kwd = kwd->next; + /* type */ + len = (int32_t)uprv_strlen(kwd->value); + if (reslen < capacity) { + uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); } - } while (kwd || attr); + reslen += len; + + kwd = kwd->next; + } while (kwd); } /* clean up */ @@ -2009,7 +2085,6 @@ _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t cap #define EXTS 0x0020 #define EXTV 0x0040 #define PRIV 0x0080 -#define ATTR 0x0100 static ULanguageTag* ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { @@ -2023,7 +2098,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta AttributeListEntry *pAttribute; char *pExtValueSubtag, *pExtValueSubtagEnd; int32_t i; - UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE; + UBool isLDMLExtension, privateuseVar = FALSE; + int32_t grandfatheredLen = 0; if (parsedLen != NULL) { *parsedLen = 0; @@ -2064,8 +2140,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta /* check if the tag is grandfathered */ for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { - /* a grandfathered tag is always longer than its preferred mapping */ - int32_t newTagLength = uprv_strlen(GRANDFATHERED[i+1]); + int32_t newTagLength; + + grandfatheredLen = tagLen; /* back up for output parsedLen */ + newTagLength = uprv_strlen(GRANDFATHERED[i+1]); if (tagLen < newTagLength) { uprv_free(tagBuf); tagBuf = (char*)uprv_malloc(newTagLength + 1); @@ -2077,7 +2155,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta tagLen = newTagLength; } uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); - break; } } @@ -2099,7 +2176,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta pExtValueSubtagEnd = NULL; pAttribute = NULL; isLDMLExtension = FALSE; - reqLDMLType = FALSE; while (pNext) { char *pSep; @@ -2223,18 +2299,12 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta pExtension = NULL; break; } - - if (isLDMLExtension && reqLDMLType) { - /* incomplete LDML extension key and type pair */ - pExtension = NULL; - break; - } } isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); /* create a new extension */ - pExtension = uprv_malloc(sizeof(ExtensionListEntry)); + pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); if (pExtension == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; goto error; @@ -2256,55 +2326,17 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } if (next & EXTV) { if (_isExtensionSubtag(pSubtag, subtagLen)) { - if (isLDMLExtension) { - if (reqLDMLType) { - /* already saw an LDML key */ - if (!_isLDMLType(pSubtag, subtagLen)) { - /* stop parsing here and let the valid LDML extension key/type - pairs processed by the code out of this while loop */ - break; - } - pExtValueSubtagEnd = pSep; - reqLDMLType = FALSE; - next = EXTS | EXTV | PRIV; - } else { - /* LDML key */ - if (!_isLDMLKey(pSubtag, subtagLen)) { - /* May be part of incomplete type */ - if (pExtValueSubtag != NULL) { - if (_isLDMLType(pSubtag, subtagLen)) { - pExtValueSubtagEnd = pSep; - reqLDMLType = FALSE; - next = EXTS | EXTV | PRIV; - } - } else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) { - /* Get attribute */ - next = ATTR; - } else { - /* stop parsing here and let the valid LDML extension key/type - pairs processed by the code out of this while loop */ - break; - } - } else { - reqLDMLType = TRUE; - next = EXTV; - } - } - } else { - /* Mark the end of this subtag */ - pExtValueSubtagEnd = pSep; - next = EXTS | EXTV | PRIV; + if (pExtValueSubtag == NULL) { + /* if the start postion of this extension's value is not yet, + this one is the first value subtag */ + pExtValueSubtag = pSubtag; } - if (next != ATTR) { - if (pExtValueSubtag == NULL) { - /* if the start postion of this extension's value is not yet, - this one is the first value subtag */ - pExtValueSubtag = pSubtag; - } + /* Mark the end of this subtag */ + pExtValueSubtagEnd = pSep; + next = EXTS | EXTV | PRIV; - continue; - } + continue; } } if (next & PRIV) { @@ -2388,24 +2420,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta break; } - if (next & ATTR) { - /* create a new attribute */ - pAttribute = uprv_malloc(sizeof(AttributeListEntry)); - if (pAttribute == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - - *pSep = 0; - pAttribute->attribute =T_CString_toLowerCase(pSubtag); - - if (!_addAttributeToList(&(t->attributes), pAttribute)) { - uprv_free(pAttribute); - } - - next = EXTS | EXTV | PRIV; - continue; - } /* If we fell through here, it means this subtag is illegal - quit parsing */ break; } @@ -2429,7 +2443,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } if (parsedLen != NULL) { - *parsedLen = (int32_t)(pLastGoodPosition - t->buf); + *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); } return t; @@ -2466,15 +2480,6 @@ ultag_close(ULanguageTag* langtag) { } } - if (langtag->attributes) { - AttributeListEntry *curAttr = langtag->attributes; - while (curAttr) { - AttributeListEntry *nextAttr = curAttr->next; - uprv_free(curAttr); - curAttr = nextAttr; - } - } - uprv_free(langtag); } @@ -2556,39 +2561,6 @@ ultag_getVariantsSize(const ULanguageTag* langtag) { return size; } -#if 0 -/* Currently not being used. */ -static const char* -ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) { - const char *attr = NULL; - AttributeListEntry *cur = langtag->attributes; - int32_t i = 0; - while (cur) { - if (i == idx) { - attr = cur->attribute; - break; - } - cur = cur->next; - i++; - } - return attr; -} -#endif - -static int32_t -ultag_getAttributesSize(const ULanguageTag* langtag) { - int32_t size = 0; - AttributeListEntry *cur = langtag->attributes; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - static const char* ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { const char *key = NULL; @@ -2744,7 +2716,7 @@ uloc_forLanguageTag(const char* langtag, int32_t reslen = 0; const char *subtag, *p; int32_t len; - int32_t i, n, m; + int32_t i, n; UBool noRegion = TRUE; lt = ultag_parse(langtag, -1, parsedLength, status); @@ -2838,10 +2810,9 @@ uloc_forLanguageTag(const char* langtag, /* keywords */ n = ultag_getExtensionsSize(lt); - m = ultag_getAttributesSize(lt); subtag = ultag_getPrivateUse(lt); - if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) { - if (reslen == 0 && (n > 0 || m > 0)) { + if (n > 0 || uprv_strlen(subtag) > 0) { + if (reslen == 0 && n > 0) { /* need a language */ if (reslen < localeIDCapacity) { uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c index a803fc0747f..daa5346d717 100644 --- a/icu4c/source/test/cintltst/cloctst.c +++ b/icu4c/source/test/cintltst/cloctst.c @@ -5654,52 +5654,58 @@ static void TestToLanguageTag(void) { } } +#define FULL_LENGTH -1 static const struct { const char *bcpID; const char *locID; int32_t len; } langtag_to_locale[] = { - {"en", "en", 2}, - {"en-us", "en_US", 5}, - {"und-US", "_US", 6}, - {"und-latn", "_Latn", 8}, - {"en-US-posix", "en_US_POSIX", 11}, + {"en", "en", FULL_LENGTH}, + {"en-us", "en_US", FULL_LENGTH}, + {"und-US", "_US", FULL_LENGTH}, + {"und-latn", "_Latn", FULL_LENGTH}, + {"en-US-posix", "en_US_POSIX", FULL_LENGTH}, {"de-de_euro", "de", 2}, - {"kok-IN", "kok_IN", 6}, + {"kok-IN", "kok_IN", FULL_LENGTH}, {"123", "", 0}, {"en_us", "", 0}, {"en-latn-x", "en_Latn", 7}, - {"art-lojban", "jbo", 3}, - {"zh-hakka", "hak", 3}, - {"zh-cmn-CH", "cmn_CH", 9}, - {"xxx-yy", "xxx_YY", 6}, - {"fr-234", "fr_234", 6}, - {"i-default", "en@x=i-default", 14}, + {"art-lojban", "jbo", FULL_LENGTH}, + {"zh-hakka", "hak", FULL_LENGTH}, + {"zh-cmn-CH", "cmn_CH", FULL_LENGTH}, + {"xxx-yy", "xxx_YY", FULL_LENGTH}, + {"fr-234", "fr_234", FULL_LENGTH}, + {"i-default", "en@x=i-default", FULL_LENGTH}, {"i-test", "", 0}, {"ja-jp-jp", "ja_JP", 5}, - {"bogus", "bogus", 5}, + {"bogus", "bogus", FULL_LENGTH}, {"boguslang", "", 0}, - {"EN-lATN-us", "en_Latn_US", 10}, - {"und-variant-1234", "__VARIANT_1234", 16}, + {"EN-lATN-us", "en_Latn_US", FULL_LENGTH}, + {"und-variant-1234", "__VARIANT_1234", FULL_LENGTH}, {"und-varzero-var1-vartwo", "__VARZERO", 11}, - {"en-u-ca-gregory", "en@calendar=gregorian", 15}, - {"en-U-cu-USD", "en@currency=usd", 11}, - {"en-US-u-va-posix", "en_US_POSIX", 16}, - {"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian", 27}, - {"en-us-posix-u-va-posix", "en_US_POSIX@va=posix", 22}, - {"en-us-u-va-posix2", "en_US@va=posix2", 17}, - {"en-us-vari1-u-va-posix", "en_US_VARI1@va=posix", 22}, - {"ar-x-1-2-3", "ar@x=1-2-3", 10}, - {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", 19}, - {"de-k-kext-u-co-phonebk-nu-latn", "de@collation=phonebook;k=kext;numbers=latn", 30}, - {"ja-u-cu-jpy-ca-jp", "ja@currency=jpy", 11}, - {"en-us-u-tz-usnyc", "en_US@timezone=America/New_York", 16}, - {"und-a-abc-def", "und@a=abc-def", 13}, - {"zh-u-ca-chinese-x-u-ca-chinese", "zh@calendar=chinese;x=u-ca-chinese", 30}, - {"x-elmer", "@x=elmer", 7}, - {"en-US-u-attr1-attr2-ca-gregory", "en_US@attribute=attr1-attr2;calendar=gregorian", 30}, + {"en-u-ca-gregory", "en@calendar=gregorian", FULL_LENGTH}, + {"en-U-cu-USD", "en@currency=usd", FULL_LENGTH}, + {"en-US-u-va-posix", "en_US_POSIX", FULL_LENGTH}, + {"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian", FULL_LENGTH}, + {"en-us-posix-u-va-posix", "en_US_POSIX@va=posix", FULL_LENGTH}, + {"en-us-u-va-posix2", "en_US@va=posix2", FULL_LENGTH}, + {"en-us-vari1-u-va-posix", "en_US_VARI1@va=posix", FULL_LENGTH}, + {"ar-x-1-2-3", "ar@x=1-2-3", FULL_LENGTH}, + {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", FULL_LENGTH}, + {"de-k-kext-u-co-phonebk-nu-latn", "de@collation=phonebook;k=kext;numbers=latn", FULL_LENGTH}, + {"ja-u-cu-jpy-ca-jp", "ja@calendar=yes;currency=jpy;jp=yes", FULL_LENGTH}, + {"en-us-u-tz-usnyc", "en_US@timezone=America/New_York", FULL_LENGTH}, + {"und-a-abc-def", "und@a=abc-def", FULL_LENGTH}, + {"zh-u-ca-chinese-x-u-ca-chinese", "zh@calendar=chinese;x=u-ca-chinese", FULL_LENGTH}, + {"x-elmer", "@x=elmer", FULL_LENGTH}, + {"en-US-u-attr1-attr2-ca-gregory", "en_US@attribute=attr1-attr2;calendar=gregorian", FULL_LENGTH}, + {"sr-u-kn", "sr@colnumeric=yes", FULL_LENGTH}, + {"de-u-kn-co-phonebk", "de@collation=phonebook;colnumeric=yes", FULL_LENGTH}, + {"en-u-attr2-attr1-kn-kb", "en@attribute=attr1-attr2;colbackwards=yes;colnumeric=yes", FULL_LENGTH}, + {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH}, + {"de-u-xc-xphonebk-co-phonebk-ca-buddhist-mo-very-lo-extensi-xd-that-de-should-vc-probably-xz-killthebuffer", - "de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that", 88}, + "de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that;xz=yes", 91}, {NULL, NULL, 0} }; @@ -5708,10 +5714,15 @@ static void TestForLanguageTag(void) { int32_t i; UErrorCode status; int32_t parsedLen; + int32_t expParsedLen; for (i = 0; langtag_to_locale[i].bcpID != NULL; i++) { status = U_ZERO_ERROR; - locale[0] = 0; + locale[0] = 0; + expParsedLen = langtag_to_locale[i].len; + if (expParsedLen == FULL_LENGTH) { + expParsedLen = uprv_strlen(langtag_to_locale[i].bcpID); + } uloc_forLanguageTag(langtag_to_locale[i].bcpID, locale, sizeof(locale), &parsedLen, &status); if (U_FAILURE(status)) { log_err_status(status, "Error returned by uloc_forLanguageTag for language tag [%s] - error: %s\n", @@ -5721,9 +5732,9 @@ static void TestForLanguageTag(void) { log_err("uloc_forLanguageTag returned locale [%s] for input language tag [%s] - expected: [%s]\n", locale, langtag_to_locale[i].bcpID, langtag_to_locale[i].locID); } - if (parsedLen != langtag_to_locale[i].len) { + if (parsedLen != expParsedLen) { log_err("uloc_forLanguageTag parsed length of %d for input language tag [%s] - expected parsed length: %d\n", - parsedLen, langtag_to_locale[i].bcpID, langtag_to_locale[i].len); + parsedLen, langtag_to_locale[i].bcpID, expParsedLen); } } }