]> granicus.if.org Git - icu/commitdiff
ICU-20410 Fix grandfathered tag w/ extensions
authorFrank Tang <ftang@chromium.org>
Mon, 11 Feb 2019 21:47:07 +0000 (13:47 -0800)
committerFrank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
Sat, 16 Feb 2019 00:51:31 +0000 (16:51 -0800)
icu4c/source/common/uloc_tag.cpp
icu4c/source/test/cintltst/cloctst.c
icu4c/source/test/cintltst/cstrcase.c
icu4c/source/test/intltest/loctest.cpp
icu4c/source/test/intltest/loctest.h
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java

index 063efd45578fae5a9c223f428ed71ccaca97db85..0e1743699ce438c6f76e863ca0d999ed71cbd587 100644 (file)
@@ -2063,13 +2063,26 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         return t.orphan();
     }
 
+    size_t parsedLenDelta = 0;
+    // Grandfathered tag will be consider together. Grandfathered tag with intervening
+    // script and region such as art-DE-lojban or art-Latn-lojban won't be
+    // matched.
     /* check if the tag is grandfathered */
     for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
-        if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
+        int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
+        if (tagLen < checkGrandfatheredLen) {
+            continue;
+        }
+        if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
+            // make sure next char is '-'.
+            continue;
+        }
+        if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
             int32_t newTagLength;
 
-            grandfatheredLen = tagLen;  /* back up for output parsedLen */
-            newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
+            grandfatheredLen = checkGrandfatheredLen;  /* back up for output parsedLen */
+            int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
+            newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
             if (tagLen < newTagLength) {
                 uprv_free(tagBuf);
                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2080,12 +2093,15 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
                 t->buf = tagBuf;
                 tagLen = newTagLength;
             }
+            parsedLenDelta = checkGrandfatheredLen - replacementLen;
             uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
+            if (checkGrandfatheredLen != tagLen) {
+                uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
+            }
             break;
         }
     }
 
-    size_t parsedLenDelta = 0;
     if (grandfatheredLen == 0) {
         for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
             const char* redundantTag = REDUNDANT[i];
@@ -2400,8 +2416,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     }
 
     if (parsedLen != NULL) {
-        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
-            (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
+        *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
     }
 
     return t.orphan();
index 09de64c874eb682839a3843724e6ff2588e7168e..5878fcc0f79e0455612c44c6814a7634b1dfeeba 100644 (file)
@@ -6160,8 +6160,8 @@ static const struct {
     /* #9562 IANA language tag data update */
     {"en-gb-oed", "en_GB_OXENDICT", FULL_LENGTH},
     {"i-navajo", "nv", FULL_LENGTH},
-    {"i-navajo-a-foo", "", 0},
-    {"i-navajo-latn-us", "", 0},
+    {"i-navajo-a-foo", "nv@a=foo", FULL_LENGTH},
+    {"i-navajo-latn-us", "nv_Latn_US", FULL_LENGTH},
     {"sgn-br", "bzs", FULL_LENGTH},
     {"sgn-br-u-co-phonebk", "bzs@collation=phonebook", FULL_LENGTH},
     {"ja-latn-hepburn-heploc", "ja_Latn__ALALC97", FULL_LENGTH},
index 6fb2cfccffe419a27867fa3cbfe3752f6f463418..e526b54f4ab31f0094ec8568beb2ed6677a0ec1d 100644 (file)
@@ -748,9 +748,12 @@ TestUCaseMap(void) {
     /* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
     ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
     locale=ucasemap_getLocale(csm);
-    if(0!=strncmp(locale, "i-klingon", 9)) {
+    // "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized
+    // into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog"
+    // and "the" will be treated as an extlang which replaces "tlh".
+    if(0!=strncmp(locale, "the", 3)) {
         log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
-                "    does not start with \"i-klingon\"\n", locale);
+                "    does not start with \"the\"\n", locale);
     }
 
     errorCode=U_ZERO_ERROR;
index e3de596b2eca36554b9ace2d5bb5d92113d3a5ce..e9ce47fbaa584bfb294bcbbb0e709bd1dfe67b75 100644 (file)
@@ -248,6 +248,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
     TESTCASE_AUTO(TestIsRightToLeft);
     TESTCASE_AUTO(TestBug13277);
     TESTCASE_AUTO(TestBug13554);
+    TESTCASE_AUTO(TestBug20410);
     TESTCASE_AUTO(TestForLanguageTag);
     TESTCASE_AUTO(TestToLanguageTag);
     TESTCASE_AUTO(TestMoveAssign);
@@ -2965,6 +2966,32 @@ void LocaleTest::TestBug13554() {
     }
 }
 
+void LocaleTest::TestBug20410() {
+    IcuTestErrorCode status(*this, "TestBug20410()");
+
+    static const char tag1[] = "art-lojban-x-0";
+    static const Locale expected1("jbo@x=0");
+    Locale result1 = Locale::forLanguageTag(tag1, status);
+    status.errIfFailureAndReset("\"%s\"", tag1);
+    assertEquals(tag1, expected1.getName(), result1.getName());
+
+    static const char tag2[] = "zh-xiang-u-nu-thai-x-0";
+    static const Locale expected2("hsn@numbers=thai;x=0");
+    Locale result2 = Locale::forLanguageTag(tag2, status);
+    status.errIfFailureAndReset("\"%s\"", tag2);
+    assertEquals(tag2, expected2.getName(), result2.getName());
+
+    static const char locid3[] = "art__lojban@x=0";
+    Locale result3 = Locale::createCanonical(locid3);
+    static const Locale expected3("art__LOJBAN@x=0");
+    assertEquals(locid3, expected3.getName(), result3.getName());
+
+    static const char locid4[] = "art-lojban-x-0";
+    Locale result4 = Locale::createCanonical(locid4);
+    static const Locale expected4("jbo@x=0");
+    assertEquals(locid4, expected4.getName(), result4.getName());
+}
+
 void LocaleTest::TestForLanguageTag() {
     IcuTestErrorCode status(*this, "TestForLanguageTag()");
 
index bebb26cebca4d1e7f307a015351a86acad5c3faf..daf3baddc6b388110b289fa289d27394f9ad0844 100644 (file)
@@ -114,6 +114,7 @@ public:
     void TestBug11421();
     void TestBug13277();
     void TestBug13554();
+    void TestBug20410();
 
     void TestAddLikelySubtags();
     void TestMinimizeSubtags();
index 2618b0ee7a4927c175b7349e0cd62838da63d410..d812ae2c6de59a59a2c7c6b3eaf0e54e67fba864 100644 (file)
@@ -169,9 +169,20 @@ public class LanguageTag {
 
         // Check if the tag is grandfathered
         String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+        // Language tag is at least 2 alpha so we can skip searching the first 2 chars.
+        int dash = 2;
+        while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
+            gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
+        }
+
         if (gfmap != null) {
-            // use preferred mapping
-            itr = new StringTokenIterator(gfmap[1], SEP);
+            if (gfmap[0].length() == languageTag.length()) {
+                // use preferred mapping
+                itr = new StringTokenIterator(gfmap[1], SEP);
+            } else {
+                // append the rest of the tag.
+                itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
+            }
             isGrandfathered = true;
         } else {
             itr = new StringTokenIterator(languageTag, SEP);
index 0d5365727a6972d0fdaf0cdb38a2dcafe5d0d55b..4e42a6d4bb92966d7d751c3542d72cb736466726 100644 (file)
@@ -4187,6 +4187,9 @@ public class ULocaleTest extends TestFmwk {
                 {"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", NOERROR},
                 {"de-latn-DE-1901-u-co-phonebk-co-pinyin-ca-gregory", "de_Latn_DE_1901@calendar=gregorian;collation=phonebook", NOERROR},
                 {"th-u-kf-nu-thai-kf-false", "th@colcasefirst=yes;numbers=thai", NOERROR},
+                /* #20410 */
+                {"art-lojban-x-0", "jbo@x=0", NOERROR},
+                {"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
         };
 
         for (int i = 0; i < langtag_to_locale.length; i++) {