From: Frank Tang Date: Wed, 11 Dec 2019 20:00:21 +0000 (+0000) Subject: ICU-20900 Fix createCanonical X-Git-Tag: release-67-rc~124 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4a8483be91dd18e12d6c9f3d62ab2105d4ebe33e;p=icu ICU-20900 Fix createCanonical See #922 --- diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index ea81c0ce87f..cfe9ea0061a 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -466,15 +466,16 @@ typedef struct CanonicalizationMap { * different semantic kinds of transformations. */ static const CanonicalizationMap CANONICALIZE_MAP[] = { - { "art_LOJBAN", "jbo" }, /* registered name */ + { "art__LOJBAN", "jbo" }, /* registered name */ { "hy__AREVELA", "hy" }, /* Registered IANA variant */ { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */ + { "zh__GUOYU", "zh" }, /* registered name */ + { "zh__HAKKA", "hak" }, /* registered name */ + { "zh__XIANG", "hsn" }, /* registered name */ + // subtags with 3 chars won't be treated as variants. { "zh_GAN", "gan" }, /* registered name */ - { "zh_GUOYU", "zh" }, /* registered name */ - { "zh_HAKKA", "hak" }, /* registered name */ { "zh_MIN_NAN", "nan" }, /* registered name */ { "zh_WUU", "wuu" }, /* registered name */ - { "zh_XIANG", "hsn" }, /* registered name */ { "zh_YUE", "yue" }, /* registered name */ }; diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index bd51a1e4268..d7ad6722504 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -255,6 +255,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c TESTCASE_AUTO(TestBug13277); TESTCASE_AUTO(TestBug13554); TESTCASE_AUTO(TestBug20410); + TESTCASE_AUTO(TestBug20900); TESTCASE_AUTO(TestConstructorAcceptsBCP47); TESTCASE_AUTO(TestForLanguageTag); TESTCASE_AUTO(TestToLanguageTag); @@ -3100,7 +3101,7 @@ void LocaleTest::TestBug20410() { static const char locid3[] = "art__lojban@x=0"; Locale result3 = Locale::createCanonical(locid3); - static const Locale expected3("art__LOJBAN@x=0"); + static const Locale expected3("jbo@x=0"); assertEquals(locid3, expected3.getName(), result3.getName()); static const char locid4[] = "art-lojban-x-0"; @@ -3109,6 +3110,32 @@ void LocaleTest::TestBug20410() { assertEquals(locid4, expected4.getName(), result4.getName()); } +void LocaleTest::TestBug20900() { + static const struct { + const char *localeID; /* input */ + const char *canonicalID; /* expected canonicalize() result */ + } testCases[] = { + { "art-lojban", "jbo" }, + { "zh-guoyu", "zh" }, + { "zh-hakka", "hak" }, + { "zh-xiang", "hsn" }, + { "zh-min-nan", "nan" }, + { "zh-gan", "gan" }, + { "zh-wuu", "wuu" }, + { "zh-yue", "yue" }, + }; + + IcuTestErrorCode status(*this, "TestBug20900"); + for (int32_t i=0; i < UPRV_LENGTHOF(testCases); i++) { + Locale loc = Locale::createCanonical(testCases[i].localeID); + std::string result = loc.toLanguageTag(status); + const char* tag = loc.isBogus() ? "BOGUS" : result.c_str(); + status.errIfFailureAndReset("FAIL: createCanonical(%s).toLanguageTag() expected \"%s\"", + testCases[i].localeID, tag); + assertEquals("createCanonical", testCases[i].canonicalID, tag); + } +} + void LocaleTest::TestConstructorAcceptsBCP47() { IcuTestErrorCode status(*this, "TestConstructorAcceptsBCP47"); diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h index 0e8c09a5acf..6c41b132bdd 100644 --- a/icu4c/source/test/intltest/loctest.h +++ b/icu4c/source/test/intltest/loctest.h @@ -119,6 +119,7 @@ public: void TestBug13277(); void TestBug13554(); void TestBug20410(); + void TestBug20900(); void TestConstructorAcceptsBCP47(); void TestAddLikelySubtags(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java index 06de947bb09..f37eade55a5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java @@ -389,23 +389,24 @@ public final class ULocale implements Serializable, Comparable { * canonicalized id. */ private static String[][] CANONICALIZE_MAP = { - { "art_LOJBAN", "jbo" }, /* registered name */ - { "cel_GAULISH", "cel__GAULISH" }, /* registered name */ - { "de_1901", "de__1901" }, /* registered name */ - { "de_1906", "de__1906" }, /* registered name */ - { "en_BOONT", "en__BOONT" }, /* registered name */ - { "en_SCOUSE", "en__SCOUSE" }, /* registered name */ + { "art__LOJBAN", "jbo" }, /* registered name */ + { "cel__GAULISH", "cel__GAULISH" }, /* registered name */ + { "de__1901", "de__1901" }, /* registered name */ + { "de__1906", "de__1906" }, /* registered name */ + { "en__BOONT", "en__BOONT" }, /* registered name */ + { "en__SCOUSE", "en__SCOUSE" }, /* registered name */ { "hy__AREVELA", "hy", null, null }, /* Registered IANA variant */ { "hy__AREVMDA", "hyw", null, null }, /* Registered IANA variant */ - { "sl_ROZAJ", "sl__ROZAJ" }, /* registered name */ - { "zh_GAN", "zh__GAN" }, /* registered name */ - { "zh_GUOYU", "zh" }, /* registered name */ - { "zh_HAKKA", "zh__HAKKA" }, /* registered name */ + { "sl__ROZAJ", "sl__ROZAJ" }, /* registered name */ + { "zh__GUOYU", "zh" }, /* registered name */ + { "zh__HAKKA", "hak" }, /* registered name */ + { "zh__XIANG", "hsn" }, /* registered name */ + // Three letter subtags won't be treated as variants. + { "zh_GAN", "gan" }, /* registered name */ { "zh_MIN", "zh__MIN" }, /* registered name */ - { "zh_MIN_NAN", "zh__MINNAN" }, /* registered name */ - { "zh_WUU", "zh__WUU" }, /* registered name */ - { "zh_XIANG", "zh__XIANG" }, /* registered name */ - { "zh_YUE", "zh__YUE" } /* registered name */ + { "zh_MIN_NAN", "nan" }, /* registered name */ + { "zh_WUU", "wuu" }, /* registered name */ + { "zh_YUE", "yue" } /* registered name */ }; /** diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index d36097e95f9..c35deceb050 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -5137,4 +5137,21 @@ public class ULocaleTest extends TestFmwk { Assert.assertEquals(displayName, locale_tag.getDisplayName(displayLocale)); Assert.assertEquals(displayName, locale_build.getDisplayName(displayLocale)); } + + @Test + public void Test20900() { + final String [][] testData = new String[][]{ + {"art-lojban", "jbo"}, + {"zh-guoyu", "zh"}, + {"zh-hakka", "hak"}, + {"zh-xiang", "hsn"}, + {"zh-min-nan", "nan"}, + {"zh-gan", "gan"}, + {"zh-yue", "yue"}, + }; + for (int row=0;row