return t.orphan();
}
+ size_t parsedLenDelta = 0;
+ // Grandfathered tag will be consider together. Grandfathered tag with intervening
+ // script and region such as art-DE-lojban or art-Latn-lojban won't be
+ // matched.
/* check if the tag is grandfathered */
for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
- if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
+ int32_t checkGrandfatheredLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i]));
+ if (tagLen < checkGrandfatheredLen) {
+ continue;
+ }
+ if (tagLen > checkGrandfatheredLen && tagBuf[checkGrandfatheredLen] != '-') {
+ // make sure next char is '-'.
+ continue;
+ }
+ if (uprv_strnicmp(GRANDFATHERED[i], tagBuf, checkGrandfatheredLen) == 0) {
int32_t newTagLength;
- grandfatheredLen = tagLen; /* back up for output parsedLen */
- newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
+ grandfatheredLen = checkGrandfatheredLen; /* back up for output parsedLen */
+ int32_t replacementLen = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
+ newTagLength = replacementLen + tagLen - checkGrandfatheredLen;
if (tagLen < newTagLength) {
uprv_free(tagBuf);
tagBuf = (char*)uprv_malloc(newTagLength + 1);
t->buf = tagBuf;
tagLen = newTagLength;
}
+ parsedLenDelta = checkGrandfatheredLen - replacementLen;
uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
+ if (checkGrandfatheredLen != tagLen) {
+ uprv_strcpy(t->buf + replacementLen, tag + checkGrandfatheredLen);
+ }
break;
}
}
- size_t parsedLenDelta = 0;
if (grandfatheredLen == 0) {
for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
const char* redundantTag = REDUNDANT[i];
}
if (parsedLen != NULL) {
- *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
- (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
+ *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
}
return t.orphan();
/* #9562 IANA language tag data update */
{"en-gb-oed", "en_GB_OXENDICT", FULL_LENGTH},
{"i-navajo", "nv", FULL_LENGTH},
- {"i-navajo-a-foo", "", 0},
- {"i-navajo-latn-us", "", 0},
+ {"i-navajo-a-foo", "nv@a=foo", FULL_LENGTH},
+ {"i-navajo-latn-us", "nv_Latn_US", FULL_LENGTH},
{"sgn-br", "bzs", FULL_LENGTH},
{"sgn-br-u-co-phonebk", "bzs@collation=phonebook", FULL_LENGTH},
{"ja-latn-hepburn-heploc", "ja_Latn__ALALC97", FULL_LENGTH},
/* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */
ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode);
locale=ucasemap_getLocale(csm);
- if(0!=strncmp(locale, "i-klingon", 9)) {
+ // "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized
+ // into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog"
+ // and "the" will be treated as an extlang which replaces "tlh".
+ if(0!=strncmp(locale, "the", 3)) {
log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n"
- " does not start with \"i-klingon\"\n", locale);
+ " does not start with \"the\"\n", locale);
}
errorCode=U_ZERO_ERROR;
TESTCASE_AUTO(TestIsRightToLeft);
TESTCASE_AUTO(TestBug13277);
TESTCASE_AUTO(TestBug13554);
+ TESTCASE_AUTO(TestBug20410);
TESTCASE_AUTO(TestForLanguageTag);
TESTCASE_AUTO(TestToLanguageTag);
TESTCASE_AUTO(TestMoveAssign);
}
}
+void LocaleTest::TestBug20410() {
+ IcuTestErrorCode status(*this, "TestBug20410()");
+
+ static const char tag1[] = "art-lojban-x-0";
+ static const Locale expected1("jbo@x=0");
+ Locale result1 = Locale::forLanguageTag(tag1, status);
+ status.errIfFailureAndReset("\"%s\"", tag1);
+ assertEquals(tag1, expected1.getName(), result1.getName());
+
+ static const char tag2[] = "zh-xiang-u-nu-thai-x-0";
+ static const Locale expected2("hsn@numbers=thai;x=0");
+ Locale result2 = Locale::forLanguageTag(tag2, status);
+ status.errIfFailureAndReset("\"%s\"", tag2);
+ assertEquals(tag2, expected2.getName(), result2.getName());
+
+ static const char locid3[] = "art__lojban@x=0";
+ Locale result3 = Locale::createCanonical(locid3);
+ static const Locale expected3("art__LOJBAN@x=0");
+ assertEquals(locid3, expected3.getName(), result3.getName());
+
+ static const char locid4[] = "art-lojban-x-0";
+ Locale result4 = Locale::createCanonical(locid4);
+ static const Locale expected4("jbo@x=0");
+ assertEquals(locid4, expected4.getName(), result4.getName());
+}
+
void LocaleTest::TestForLanguageTag() {
IcuTestErrorCode status(*this, "TestForLanguageTag()");
void TestBug11421();
void TestBug13277();
void TestBug13554();
+ void TestBug20410();
void TestAddLikelySubtags();
void TestMinimizeSubtags();
// Check if the tag is grandfathered
String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag));
+ // Language tag is at least 2 alpha so we can skip searching the first 2 chars.
+ int dash = 2;
+ while (gfmap == null && (dash = languageTag.indexOf('-', dash + 1)) != -1) {
+ gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(languageTag.substring(0, dash)));
+ }
+
if (gfmap != null) {
- // use preferred mapping
- itr = new StringTokenIterator(gfmap[1], SEP);
+ if (gfmap[0].length() == languageTag.length()) {
+ // use preferred mapping
+ itr = new StringTokenIterator(gfmap[1], SEP);
+ } else {
+ // append the rest of the tag.
+ itr = new StringTokenIterator(gfmap[1] + languageTag.substring(dash), SEP);
+ }
isGrandfathered = true;
} else {
itr = new StringTokenIterator(languageTag, SEP);
{"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", NOERROR},
{"de-latn-DE-1901-u-co-phonebk-co-pinyin-ca-gregory", "de_Latn_DE_1901@calendar=gregorian;collation=phonebook", NOERROR},
{"th-u-kf-nu-thai-kf-false", "th@colcasefirst=yes;numbers=thai", NOERROR},
+ /* #20410 */
+ {"art-lojban-x-0", "jbo@x=0", NOERROR},
+ {"zh-xiang-u-nu-thai-x-0", "hsn@numbers=thai;x=0", NOERROR},
};
for (int i = 0; i < langtag_to_locale.length; i++) {