]> granicus.if.org Git - icu/commitdiff
ICU-9265 Added support for typeless keyword in BCP 47 u extension. Also implemented...
authorYoshito Umaoka <y.umaoka@gmail.com>
Fri, 11 May 2012 21:52:24 +0000 (21:52 +0000)
committerYoshito Umaoka <y.umaoka@gmail.com>
Fri, 11 May 2012 21:52:24 +0000 (21:52 +0000)
X-SVN-Rev: 31816

icu4c/source/common/uloc_tag.c
icu4c/source/test/cintltst/cloctst.c

index 5db5e8f41333627e43c2b84e18661bb932949b68..1bc32c2c93230fa45e0a97c960b5ec9eb2a87bf5 100644 (file)
@@ -15,6 +15,7 @@
 #include "putilimp.h"
 #include "uinvchar.h"
 #include "ulocimp.h"
+#include "uassert.h"
 
 /* struct holding a single variant */
 typedef struct VariantListEntry {
@@ -44,7 +45,6 @@ typedef struct ULanguageTag {
     const char          *region;
     VariantListEntry    *variants;
     ExtensionListEntry  *extensions;
-    AttributeListEntry  *attributes;
     const char          *privateuse;
     const char          *grandfathered;
 } ULanguageTag;
@@ -70,6 +70,7 @@ static const char* POSIX_KEY = "va";
 static const char* POSIX_VALUE = "posix";
 static const char* LOCALE_ATTRIBUTE_KEY = "attribute";
 static const char* PRIVUSE_VARIANT_PREFIX = "lvariant";
+static const char* LOCALE_TYPE_YES = "yes";
 
 #define LANG_UND_LEN 3
 
@@ -152,15 +153,6 @@ ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
 static int32_t
 ultag_getVariantsSize(const ULanguageTag* langtag);
 
-#if 0
-/* Currently not being used. */
-static const char*
-ultag_getAttribute(const ULanguageTag* langtag, int32_t idx);
-#endif
-
-static int32_t
-ultag_getAttributesSize(const ULanguageTag* langtag);
-
 static const char*
 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
 
@@ -627,8 +619,6 @@ _initializeULanguageTag(ULanguageTag* langtag) {
     langtag->variants = NULL;
     langtag->extensions = NULL;
 
-    langtag->attributes = NULL;
-
     langtag->grandfathered = EMPTY;
     langtag->privateuse = EMPTY;
 }
@@ -1203,7 +1193,7 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                     if (_isVariantSubtag(pVar, -1)) {
                         if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
                             /* emit the variant to the list */
-                            var = uprv_malloc(sizeof(VariantListEntry));
+                            var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
                             if (var == NULL) {
                                 *status = U_MEMORY_ALLOCATION_ERROR;
                                 break;
@@ -1350,12 +1340,12 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                         }
 
                         /* create AttributeListEntry */
-                        attr = uprv_malloc(sizeof(AttributeListEntry));
+                        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
                         if (attr == NULL) {
                             *status = U_MEMORY_ALLOCATION_ERROR;
                             break;
                         }
-                        attrValue = uprv_malloc(attrBufLength + 1);
+                        attrValue = (char*)uprv_malloc(attrBufLength + 1);
                         if (attrValue == NULL) {
                             *status = U_MEMORY_ALLOCATION_ERROR;
                             break;
@@ -1440,7 +1430,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
 
             if (!isAttribute) {
                 /* create ExtensionListEntry */
-                ext = uprv_malloc(sizeof(ExtensionListEntry));
+                ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
                 if (ext == NULL) {
                     *status = U_MEMORY_ALLOCATION_ERROR;
                     break;
@@ -1461,7 +1451,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
         /* Special handling for POSIX variant - add the keywords for POSIX */
         if (hadPosix) {
             /* create ExtensionListEntry for POSIX */
-            ext = uprv_malloc(sizeof(ExtensionListEntry));
+            ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
             if (ext == NULL) {
                 *status = U_MEMORY_ALLOCATION_ERROR;
                 goto cleanup;
@@ -1567,120 +1557,234 @@ cleanup:
  */
 static void
 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
-    const char *p, *pNext, *pSep, *pTmp, *pTmpStart;
-    const char *pBcpKey, *pBcpType;
-    const char *pKey, *pType;
-    int32_t bcpKeyLen = 0, bcpTypeLen;
+    const char *pTag;   /* beginning of current subtag */
+    const char *pKwds;  /* beginning of key-type pairs */
+    UBool variantExists = *posixVariant;
+
+    ExtensionListEntry *kwdFirst = NULL;    /* first LDML keyword */
     ExtensionListEntry *kwd, *nextKwd;
-    ExtensionListEntry *kwdFirst = NULL;
+
+    AttributeListEntry *attrFirst = NULL;   /* first attribute */
+    AttributeListEntry *attr, *nextAttr;
+
+    int32_t len;
     int32_t bufIdx = 0;
-    int32_t  len;
-    UBool variantExists = *posixVariant;
-    UBool searchFurther;
+
+    char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
+    int32_t attrBufIdx = 0;
 
     /* Reset the posixVariant value */
     *posixVariant = FALSE;
 
-    pNext = ldmlext;
-    pBcpKey = pBcpType = NULL;
-    while (pNext) {
-        p = pSep = pNext;
+    pTag = ldmlext;
+    pKwds = NULL;
 
+    /* Iterate through u extension attributes */
+    while (*pTag) {
         /* locate next separator char */
-        while (*pSep) {
-            if (*pSep == SEP) {
-                searchFurther = FALSE;
-                if (pBcpKey != NULL) {
-                    pTmpStart = (pSep + 1);
-                    pTmp = pTmpStart;
-                    /* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */
-                    while (*pTmp) {
-                        if (*pTmp == SEP || *(pTmp + 1) == 0) {
-                            if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) {
-                                searchFurther = TRUE;
-                            }
-                            break;
-                        }
-                        pTmp++;
-                    }
-                }
-                if (searchFurther) {
-                    pSep++;
-                    continue;
-                } else {
-                    break;
-                }
-            }
-            pSep++;
+        for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
+
+        if (_isLDMLKey(pTag, len)) {
+            pKwds = pTag;
+            break;
         }
-        if (*pSep == 0) {
-            /* last subtag */
-            pNext = NULL;
-        } else {
-            pNext = pSep + 1;
+
+        /* add this attribute to the list */
+        attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
+        if (attr == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup;
         }
 
-        if (pBcpKey == NULL) {
-            pBcpKey = p;
-            bcpKeyLen = (int32_t)(pSep - p);
+        if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
+            uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
+            attrBuf[attrBufIdx + len] = 0;
+            attr->attribute = &attrBuf[attrBufIdx];
+            attrBufIdx += (len + 1);
         } else {
-            pBcpType = p;
-            bcpTypeLen = (int32_t)(pSep - p);
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            goto cleanup;
+        }
 
-            /* BCP key to locale key */
-            len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
-            if (U_FAILURE(*status)) {
-                goto cleanup;
-            }
-            pKey = buf + bufIdx;
-            bufIdx += len;
-            *(buf + bufIdx) = 0;
-            bufIdx++;
+        if (!_addAttributeToList(&attrFirst, attr)) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            uprv_free(attr);
+            goto cleanup;
+        }
 
-            /* BCP type to locale type */
-            len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
-            if (U_FAILURE(*status)) {
-                goto cleanup;
+        /* next tag */
+        pTag += len;
+        if (*pTag) {
+            /* next to the separator */
+            pTag++;
+        }
+    }
+
+    if (attrFirst) {
+        /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
+
+        if (attrBufIdx > bufSize) {
+            /* attrBufIdx == <total length of attribute subtag> + 1 */
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            goto cleanup;
+        }
+
+        kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
+        if (kwd == NULL) {
+            *status = U_MEMORY_ALLOCATION_ERROR;
+            goto cleanup;
+        }
+
+        kwd->key = LOCALE_ATTRIBUTE_KEY;
+        kwd->value = buf;
+
+        /* attribute subtags sorted in alphabetical order as type */
+        attr = attrFirst;
+        while (attr != NULL) {
+            nextAttr = attr->next;
+
+            /* buffer size check is done above */
+            if (attr != attrFirst) {
+                *(buf + bufIdx) = SEP;
+                bufIdx++;
             }
-            pType = buf + bufIdx;
+
+            len = uprv_strlen(attr->attribute);
+            uprv_memcpy(buf + bufIdx, attr->attribute, len);
             bufIdx += len;
-            *(buf + bufIdx) = 0;
-            bufIdx++;
 
-            /* Special handling for u-va-posix, since we want to treat this as a variant, not */
-            /* as a keyword.                                                                  */
+            attr = nextAttr;
+        }
+        *(buf + bufIdx) = 0;
+        bufIdx++;
+
+        if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            uprv_free(kwd);
+            goto cleanup;
+        }
 
-            if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
-                *posixVariant = TRUE;
+        /* once keyword entry is created, delete the attribute list */
+        attr = attrFirst;
+        while (attr != NULL) {
+            nextAttr = attr->next;
+            uprv_free(attr);
+            attr = nextAttr;
+        }
+        attrFirst = NULL;
+    }
+
+    if (pKwds) {
+        const char *pBcpKey = NULL;     /* u extenstion key subtag */
+        const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
+        int32_t bcpKeyLen = 0;
+        int32_t bcpTypeLen = 0;
+        UBool isDone = FALSE;
+
+        pTag = pKwds;
+        /* BCP47 representation of LDML key/type pairs */
+        while (!isDone) {
+            const char *pNextBcpKey = NULL;
+            int32_t nextBcpKeyLen;
+            UBool emitKeyword = FALSE;
+
+            if (*pTag) {
+                /* locate next separator char */
+                for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
+
+                if (_isLDMLKey(pTag, len)) {
+                    if (pBcpKey) {
+                        emitKeyword = TRUE;
+                        pNextBcpKey = pTag;
+                        nextBcpKeyLen = len;
+                    } else {
+                        pBcpKey = pTag;
+                        bcpKeyLen = len;
+                    }
+                } else {
+                    U_ASSERT(pBcpKey != NULL);
+                    /* within LDML type subtags */
+                    if (pBcpType) {
+                        bcpTypeLen += (len + 1);
+                    } else {
+                        pBcpType = pTag;
+                        bcpTypeLen = len;
+                    }
+                }
+
+                /* next tag */
+                pTag += len;
+                if (*pTag) {
+                    /* next to the separator */
+                    pTag++;
+                }
             } else {
-                /* create an ExtensionListEntry for this keyword */
-                kwd = uprv_malloc(sizeof(ExtensionListEntry));
-                if (kwd == NULL) {
-                    *status = U_MEMORY_ALLOCATION_ERROR;
+                /* processing last one */
+                emitKeyword = TRUE;
+                isDone = TRUE;
+            }
+
+            if (emitKeyword) {
+                const char *pKey = NULL;    /* LDML key */
+                const char *pType = NULL;   /* LDML type */
+
+                U_ASSERT(pBcpKey != NULL);
+
+                /* u extension key to LDML key */
+                len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
+                if (U_FAILURE(*status)) {
                     goto cleanup;
                 }
+                pKey = buf + bufIdx;
+                bufIdx += len;
+                *(buf + bufIdx) = 0;
+                bufIdx++;
+
+                if (pBcpType) {
+                    /* BCP type to locale type */
+                    len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
+                    if (U_FAILURE(*status)) {
+                        goto cleanup;
+                    }
+                    pType = buf + bufIdx;
+                    bufIdx += len;
+                    *(buf + bufIdx) = 0;
+                    bufIdx++;
+                } else {
+                    /* typeless - default type value is "yes" */
+                    pType = LOCALE_TYPE_YES;
+                }
 
-                kwd->key = pKey;
-                kwd->value = pType;
+                /* Special handling for u-va-posix, since we want to treat this as a variant, 
+                   not as a keyword */
+                if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
+                    *posixVariant = TRUE;
+                } else {
+                    /* create an ExtensionListEntry for this keyword */
+                    kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
+                    if (kwd == NULL) {
+                        *status = U_MEMORY_ALLOCATION_ERROR;
+                        goto cleanup;
+                    }
 
-                if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
-                    *status = U_ILLEGAL_ARGUMENT_ERROR;
-                    uprv_free(kwd);
-                    goto cleanup;
+                    kwd->key = pKey;
+                    kwd->value = pType;
+
+                    if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+                        *status = U_ILLEGAL_ARGUMENT_ERROR;
+                        uprv_free(kwd);
+                        goto cleanup;
+                    }
                 }
-            }
 
-            /* for next pair */
-            pBcpKey = NULL;
-            pBcpType = NULL;
+                pBcpKey = pNextBcpKey;
+                bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
+                pBcpType = NULL;
+                bcpTypeLen = 0;
+            }
         }
     }
 
-    if (pBcpKey != NULL) {
-        *status = U_ILLEGAL_ARGUMENT_ERROR;
-        goto cleanup;
-    }
-
     kwd = kwdFirst;
     while (kwd != NULL) {
         nextKwd = kwd->next;
@@ -1691,6 +1795,13 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
     return;
 
 cleanup:
+    attr = attrFirst;
+    while (attr != NULL) {
+        nextAttr = attr->next;
+        uprv_free(attr);
+        attr = nextAttr;
+    }
+
     kwd = kwdFirst;
     while (kwd != NULL) {
         nextKwd = kwd->next;
@@ -1707,8 +1818,6 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
     int32_t len;
     ExtensionListEntry *kwdFirst = NULL;
     ExtensionListEntry *kwd;
-    AttributeListEntry *attrFirst = NULL;
-    AttributeListEntry *attr;
     const char *key, *type;
     char *kwdBuf = NULL;
     int32_t kwdBufLength = capacity;
@@ -1718,7 +1827,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
         return 0;
     }
 
-    kwdBuf = (char *)uprv_malloc(kwdBufLength);
+    kwdBuf = (char*)uprv_malloc(kwdBufLength);
     if (kwdBuf == NULL) {
         *status = U_MEMORY_ALLOCATION_ERROR;
         return 0;
@@ -1741,7 +1850,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
                 break;
             }
         } else {
-            kwd = uprv_malloc(sizeof(ExtensionListEntry));
+            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
             if (kwd == NULL) {
                 *status = U_MEMORY_ALLOCATION_ERROR;
                 break;
@@ -1760,7 +1869,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
         type = ultag_getPrivateUse(langtag);
         if ((int32_t)uprv_strlen(type) > 0) {
             /* add private use as a keyword */
-            kwd = uprv_malloc(sizeof(ExtensionListEntry));
+            kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
             if (kwd == NULL) {
                 *status = U_MEMORY_ALLOCATION_ERROR;
             } else {
@@ -1784,78 +1893,45 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
         reslen += len;
     }
 
-    attrFirst = langtag->attributes;
-    if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) {
+    if (U_SUCCESS(*status) && kwdFirst != NULL) {
         /* write out the sorted keywords */
         UBool firstValue = TRUE;
-        UBool firstAttr = TRUE;
         kwd = kwdFirst;
-        attr = attrFirst;
         do {
             if (reslen < capacity) {
                 if (firstValue) {
                     /* '@' */
                     *(appendAt + reslen) = LOCALE_EXT_SEP;
                     firstValue = FALSE;
-                } else if (attr) {
-                    /* '-' */
-                    *(appendAt + reslen) = SEP;
-                }else {
+                } else {
                     /* ';' */
                     *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
                 }
             }
             reslen++;
 
-            if (attr) {
-                if (firstAttr) {
-                    len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY);
-                    if (reslen < capacity) {
-                        uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen));
-                    }
-                    reslen += len;
-
-                    /* '=' */
-                    if (reslen < capacity) {
-                        *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
-                    }
-
-                    reslen++;
-
-                    firstAttr = FALSE;
-                }
-
-                len = (int32_t)uprv_strlen(attr->attribute);
-                if (reslen < capacity) {
-                    uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
-                }
-                reslen += len;
-
-                attr = attr->next;
-            } else if (kwd) {
-                /* key */
-                len = (int32_t)uprv_strlen(kwd->key);
-                if (reslen < capacity) {
-                    uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
-                }
-                reslen += len;
-
-                /* '=' */
-                if (reslen < capacity) {
-                    *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
-                }
-                reslen++;
+            /* key */
+            len = (int32_t)uprv_strlen(kwd->key);
+            if (reslen < capacity) {
+                uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
+            }
+            reslen += len;
 
-                /* type */
-                len = (int32_t)uprv_strlen(kwd->value);
-                if (reslen < capacity) {
-                    uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
-                }
-                reslen += len;
+            /* '=' */
+            if (reslen < capacity) {
+                *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
+            }
+            reslen++;
 
-                kwd = kwd->next;
+            /* type */
+            len = (int32_t)uprv_strlen(kwd->value);
+            if (reslen < capacity) {
+                uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
             }
-        } while (kwd || attr);
+            reslen += len;
+
+            kwd = kwd->next;
+        } while (kwd);
     }
 
     /* clean up */
@@ -2009,7 +2085,6 @@ _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t cap
 #define EXTS 0x0020
 #define EXTV 0x0040
 #define PRIV 0x0080
-#define ATTR 0x0100
 
 static ULanguageTag*
 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
@@ -2023,7 +2098,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     AttributeListEntry *pAttribute;
     char *pExtValueSubtag, *pExtValueSubtagEnd;
     int32_t i;
-    UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE;
+    UBool isLDMLExtension, privateuseVar = FALSE;
+    int32_t grandfatheredLen = 0;
 
     if (parsedLen != NULL) {
         *parsedLen = 0;
@@ -2064,8 +2140,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     /* check if the tag is grandfathered */
     for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
-            /* a grandfathered tag is always longer than its preferred mapping */
-            int32_t newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
+            int32_t newTagLength;
+
+            grandfatheredLen = tagLen;  /* back up for output parsedLen */
+            newTagLength = uprv_strlen(GRANDFATHERED[i+1]);
             if (tagLen < newTagLength) {
                 uprv_free(tagBuf);
                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2077,7 +2155,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
                 tagLen = newTagLength;
             }
             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
-
             break;
         }
     }
@@ -2099,7 +2176,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     pExtValueSubtagEnd = NULL;
     pAttribute = NULL;
     isLDMLExtension = FALSE;
-    reqLDMLType = FALSE;
 
     while (pNext) {
         char *pSep;
@@ -2223,18 +2299,12 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
                         pExtension = NULL;
                         break;
                     }
-
-                    if (isLDMLExtension && reqLDMLType) {
-                        /* incomplete LDML extension key and type pair */
-                        pExtension = NULL;
-                        break;
-                    }
                 }
 
                 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
 
                 /* create a new extension */
-                pExtension = uprv_malloc(sizeof(ExtensionListEntry));
+                pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
                 if (pExtension == NULL) {
                     *status = U_MEMORY_ALLOCATION_ERROR;
                     goto error;
@@ -2256,55 +2326,17 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         }
         if (next & EXTV) {
             if (_isExtensionSubtag(pSubtag, subtagLen)) {
-                if (isLDMLExtension) {
-                    if (reqLDMLType) {
-                        /* already saw an LDML key */
-                        if (!_isLDMLType(pSubtag, subtagLen)) {
-                            /* stop parsing here and let the valid LDML extension key/type
-                               pairs processed by the code out of this while loop */
-                            break;
-                        }
-                        pExtValueSubtagEnd = pSep;
-                        reqLDMLType = FALSE;
-                        next = EXTS | EXTV | PRIV;
-                    } else {
-                        /* LDML key */
-                        if (!_isLDMLKey(pSubtag, subtagLen)) {
-                            /* May be part of incomplete type */
-                            if (pExtValueSubtag != NULL) {
-                                if (_isLDMLType(pSubtag, subtagLen)) {
-                                    pExtValueSubtagEnd = pSep;
-                                    reqLDMLType = FALSE;
-                                    next = EXTS | EXTV | PRIV;
-                                }
-                            } else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) {
-                                /* Get attribute */
-                                next = ATTR;
-                            } else {
-                                /* stop parsing here and let the valid LDML extension key/type
-                                   pairs processed by the code out of this while loop */
-                                break;
-                            }
-                        } else {
-                            reqLDMLType = TRUE;
-                            next = EXTV;
-                        }
-                    }
-                } else {
-                    /* Mark the end of this subtag */
-                    pExtValueSubtagEnd = pSep;
-                    next = EXTS | EXTV | PRIV;
+                if (pExtValueSubtag == NULL) {
+                    /* if the start postion of this extension's value is not yet,
+                        this one is the first value subtag */
+                    pExtValueSubtag = pSubtag;
                 }
 
-                if (next != ATTR) {
-                    if (pExtValueSubtag == NULL) {
-                        /* if the start postion of this extension's value is not yet,
-                           this one is the first value subtag */
-                        pExtValueSubtag = pSubtag;
-                    }
+                /* Mark the end of this subtag */
+                pExtValueSubtagEnd = pSep;
+                next = EXTS | EXTV | PRIV;
 
-                    continue;
-                }
+                continue;
             }
         }
         if (next & PRIV) {
@@ -2388,24 +2420,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
             break;
         }
 
-        if (next & ATTR) {
-            /* create a new attribute */
-            pAttribute = uprv_malloc(sizeof(AttributeListEntry));
-            if (pAttribute == NULL) {
-                *status = U_MEMORY_ALLOCATION_ERROR;
-                goto error;
-            }
-
-            *pSep = 0;
-            pAttribute->attribute =T_CString_toLowerCase(pSubtag);
-
-            if (!_addAttributeToList(&(t->attributes), pAttribute)) {
-                uprv_free(pAttribute);
-            }
-
-            next = EXTS | EXTV | PRIV;
-            continue;
-        }
         /* If we fell through here, it means this subtag is illegal - quit parsing */
         break;
     }
@@ -2429,7 +2443,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     }
 
     if (parsedLen != NULL) {
-        *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
+        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
     }
 
     return t;
@@ -2466,15 +2480,6 @@ ultag_close(ULanguageTag* langtag) {
         }
     }
 
-    if (langtag->attributes) {
-        AttributeListEntry *curAttr = langtag->attributes;
-        while (curAttr) {
-            AttributeListEntry *nextAttr = curAttr->next;
-            uprv_free(curAttr);
-            curAttr = nextAttr;
-        }
-    }
-
     uprv_free(langtag);
 }
 
@@ -2556,39 +2561,6 @@ ultag_getVariantsSize(const ULanguageTag* langtag) {
     return size;
 }
 
-#if 0
-/* Currently not being used. */
-static const char*
-ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) {
-    const char *attr = NULL;
-    AttributeListEntry *cur = langtag->attributes;
-    int32_t i = 0;
-    while (cur) {
-        if (i == idx) {
-            attr = cur->attribute;
-            break;
-        }
-        cur = cur->next;
-        i++;
-    }
-    return attr;
-}
-#endif
-
-static int32_t
-ultag_getAttributesSize(const ULanguageTag* langtag) {
-    int32_t size = 0;
-    AttributeListEntry *cur = langtag->attributes;
-    while (TRUE) {
-        if (cur == NULL) {
-            break;
-        }
-        size++;
-        cur = cur->next;
-    }
-    return size;
-}
-
 static const char*
 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
     const char *key = NULL;
@@ -2744,7 +2716,7 @@ uloc_forLanguageTag(const char* langtag,
     int32_t reslen = 0;
     const char *subtag, *p;
     int32_t len;
-    int32_t i, n, m;
+    int32_t i, n;
     UBool noRegion = TRUE;
 
     lt = ultag_parse(langtag, -1, parsedLength, status);
@@ -2838,10 +2810,9 @@ uloc_forLanguageTag(const char* langtag,
 
     /* keywords */
     n = ultag_getExtensionsSize(lt);
-    m = ultag_getAttributesSize(lt);
     subtag = ultag_getPrivateUse(lt);
-    if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) {
-        if (reslen == 0 && (n > 0 || m > 0)) {
+    if (n > 0 || uprv_strlen(subtag) > 0) {
+        if (reslen == 0 && n > 0) {
             /* need a language */
             if (reslen < localeIDCapacity) {
                 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
index a803fc0747f9a8b9cd3aaac94542f56352f7ef27..daa5346d717df9b51535632a5741e6b3ea4ff77f 100644 (file)
@@ -5654,52 +5654,58 @@ static void TestToLanguageTag(void) {
     }
 }
 
+#define FULL_LENGTH -1
 static const struct {
     const char  *bcpID;
     const char  *locID;
     int32_t     len;
 } langtag_to_locale[] = {
-    {"en",                  "en",                   2},
-    {"en-us",               "en_US",                5},
-    {"und-US",              "_US",                  6},
-    {"und-latn",            "_Latn",                8},
-    {"en-US-posix",         "en_US_POSIX",          11},
+    {"en",                  "en",                   FULL_LENGTH},
+    {"en-us",               "en_US",                FULL_LENGTH},
+    {"und-US",              "_US",                  FULL_LENGTH},
+    {"und-latn",            "_Latn",                FULL_LENGTH},
+    {"en-US-posix",         "en_US_POSIX",          FULL_LENGTH},
     {"de-de_euro",          "de",                   2},
-    {"kok-IN",              "kok_IN",               6},
+    {"kok-IN",              "kok_IN",               FULL_LENGTH},
     {"123",                 "",                     0},
     {"en_us",               "",                     0},
     {"en-latn-x",           "en_Latn",              7},
-    {"art-lojban",          "jbo",                  3},
-    {"zh-hakka",            "hak",                  3},
-    {"zh-cmn-CH",           "cmn_CH",               9},
-    {"xxx-yy",              "xxx_YY",               6},
-    {"fr-234",              "fr_234",               6},
-    {"i-default",           "en@x=i-default",      14},
+    {"art-lojban",          "jbo",                  FULL_LENGTH},
+    {"zh-hakka",            "hak",                  FULL_LENGTH},
+    {"zh-cmn-CH",           "cmn_CH",               FULL_LENGTH},
+    {"xxx-yy",              "xxx_YY",               FULL_LENGTH},
+    {"fr-234",              "fr_234",               FULL_LENGTH},
+    {"i-default",           "en@x=i-default",       FULL_LENGTH},
     {"i-test",              "",                     0},
     {"ja-jp-jp",            "ja_JP",                5},
-    {"bogus",               "bogus",                5},
+    {"bogus",               "bogus",                FULL_LENGTH},
     {"boguslang",           "",                     0},
-    {"EN-lATN-us",          "en_Latn_US",           10},
-    {"und-variant-1234",    "__VARIANT_1234",       16},
+    {"EN-lATN-us",          "en_Latn_US",           FULL_LENGTH},
+    {"und-variant-1234",    "__VARIANT_1234",       FULL_LENGTH},
     {"und-varzero-var1-vartwo", "__VARZERO",        11},
-    {"en-u-ca-gregory",     "en@calendar=gregorian",    15},
-    {"en-U-cu-USD",         "en@currency=usd",      11},
-    {"en-US-u-va-posix",    "en_US_POSIX",          16},
-    {"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian", 27},
-    {"en-us-posix-u-va-posix",   "en_US_POSIX@va=posix",  22},
-    {"en-us-u-va-posix2",        "en_US@va=posix2",       17},
-    {"en-us-vari1-u-va-posix",   "en_US_VARI1@va=posix",  22},
-    {"ar-x-1-2-3",          "ar@x=1-2-3",           10},
-    {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", 19},
-    {"de-k-kext-u-co-phonebk-nu-latn",  "de@collation=phonebook;k=kext;numbers=latn",   30},
-    {"ja-u-cu-jpy-ca-jp",   "ja@currency=jpy",      11},
-    {"en-us-u-tz-usnyc",    "en_US@timezone=America/New_York",      16},
-    {"und-a-abc-def",       "und@a=abc-def",        13},
-    {"zh-u-ca-chinese-x-u-ca-chinese",  "zh@calendar=chinese;x=u-ca-chinese",   30},
-    {"x-elmer",             "@x=elmer",             7},
-    {"en-US-u-attr1-attr2-ca-gregory", "en_US@attribute=attr1-attr2;calendar=gregorian", 30},
+    {"en-u-ca-gregory",     "en@calendar=gregorian",    FULL_LENGTH},
+    {"en-U-cu-USD",         "en@currency=usd",      FULL_LENGTH},
+    {"en-US-u-va-posix",    "en_US_POSIX",          FULL_LENGTH},
+    {"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian",   FULL_LENGTH},
+    {"en-us-posix-u-va-posix",   "en_US_POSIX@va=posix",    FULL_LENGTH},
+    {"en-us-u-va-posix2",        "en_US@va=posix2",         FULL_LENGTH},
+    {"en-us-vari1-u-va-posix",   "en_US_VARI1@va=posix",    FULL_LENGTH},
+    {"ar-x-1-2-3",          "ar@x=1-2-3",           FULL_LENGTH},
+    {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", FULL_LENGTH},
+    {"de-k-kext-u-co-phonebk-nu-latn",  "de@collation=phonebook;k=kext;numbers=latn",   FULL_LENGTH},
+    {"ja-u-cu-jpy-ca-jp",   "ja@calendar=yes;currency=jpy;jp=yes",  FULL_LENGTH},
+    {"en-us-u-tz-usnyc",    "en_US@timezone=America/New_York",  FULL_LENGTH},
+    {"und-a-abc-def",       "und@a=abc-def",        FULL_LENGTH},
+    {"zh-u-ca-chinese-x-u-ca-chinese",  "zh@calendar=chinese;x=u-ca-chinese",   FULL_LENGTH},
+    {"x-elmer",             "@x=elmer",             FULL_LENGTH},
+    {"en-US-u-attr1-attr2-ca-gregory", "en_US@attribute=attr1-attr2;calendar=gregorian",    FULL_LENGTH},
+    {"sr-u-kn",             "sr@colnumeric=yes",    FULL_LENGTH},
+    {"de-u-kn-co-phonebk",  "de@collation=phonebook;colnumeric=yes",    FULL_LENGTH},
+    {"en-u-attr2-attr1-kn-kb",  "en@attribute=attr1-attr2;colbackwards=yes;colnumeric=yes", FULL_LENGTH},
+    {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn",   "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz",  FULL_LENGTH},
+
     {"de-u-xc-xphonebk-co-phonebk-ca-buddhist-mo-very-lo-extensi-xd-that-de-should-vc-probably-xz-killthebuffer",
-     "de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that", 88},
+     "de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that;xz=yes", 91},
     {NULL,          NULL,           0}
 };
 
@@ -5708,10 +5714,15 @@ static void TestForLanguageTag(void) {
     int32_t i;
     UErrorCode status;
     int32_t parsedLen;
+    int32_t expParsedLen;
 
     for (i = 0; langtag_to_locale[i].bcpID != NULL; i++) {
         status = U_ZERO_ERROR;
-        locale[0] = 0;        
+        locale[0] = 0;
+        expParsedLen = langtag_to_locale[i].len;
+        if (expParsedLen == FULL_LENGTH) {
+            expParsedLen = uprv_strlen(langtag_to_locale[i].bcpID);
+        }
         uloc_forLanguageTag(langtag_to_locale[i].bcpID, locale, sizeof(locale), &parsedLen, &status);
         if (U_FAILURE(status)) {
             log_err_status(status, "Error returned by uloc_forLanguageTag for language tag [%s] - error: %s\n",
@@ -5721,9 +5732,9 @@ static void TestForLanguageTag(void) {
                 log_err("uloc_forLanguageTag returned locale [%s] for input language tag [%s] - expected: [%s]\n",
                     locale, langtag_to_locale[i].bcpID, langtag_to_locale[i].locID);
             }
-            if (parsedLen != langtag_to_locale[i].len) {
+            if (parsedLen != expParsedLen) {
                 log_err("uloc_forLanguageTag parsed length of %d for input language tag [%s] - expected parsed length: %d\n",
-                    parsedLen, langtag_to_locale[i].bcpID, langtag_to_locale[i].len);
+                    parsedLen, langtag_to_locale[i].bcpID, expParsedLen);
             }
         }
     }