ICU-20310 omit "-true" in toLanguageTag
authorFrank Tang <ftang@chromium.org>
Mon, 23 Dec 2019 21:12:53 +0000 (21:12 +0000)
committerFrank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
Mon, 30 Dec 2019 23:39:59 +0000 (15:39 -0800)
See #952

icu4c/source/common/uloc_tag.cpp
icu4c/source/test/cintltst/cloctst.c
icu4c/source/test/intltest/localebuildertest.cpp
icu4c/source/test/intltest/loctest.cpp
icu4c/source/test/intltest/loctest.h
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LanguageTag.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java

index 2b76a9273c6bbf5fb933dfaa9644db48d8d65b54..4f3afcb672512be7ac4b48173dc6070632b15274 100644 (file)
@@ -1508,8 +1508,11 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st
                 } else {
                     sink.Append("-", 1);
                     sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key)));
-                    sink.Append("-", 1);
-                    sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
+                    if (uprv_strcmp(ext->value, "true") != 0 &&
+                        uprv_strcmp(ext->value, "yes") != 0) {
+                      sink.Append("-", 1);
+                      sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value)));
+                    }
                 }
             }
         }
index 36cd7825c59d3f61387a8f022fc09eca94905f38..affdecd13e4cc42a58f83aecfbd48cc877a70cf7 100644 (file)
@@ -6062,6 +6062,14 @@ const char* const locale_to_langtag[][3] = {
     // The following now uses standard canonicalization.
     {"az_AZ_CYRL", "az-AZ-x-lvariant-cyrl", NULL},
 
+
+    /* ICU-20310 */
+    {"en-u-kn-true",   "en-u-kn", "en-u-kn"},
+    {"en-u-kn",   "en-u-kn", "en-u-kn"},
+    {"de-u-co-yes",   "de-u-co", "de-u-co"},
+    {"de-u-co",   "de-u-co", "de-u-co"},
+    {"de@collation=yes",   "de-u-co", "de-u-co"},
+    {"cmn-hans-cn-u-ca-t-ca-x-t-u",   "cmn-Hans-CN-t-ca-u-ca-x-t-u", "cmn-Hans-CN-t-ca-u-ca-x-t-u"},
     {NULL,          NULL,           NULL}
 };
 
index b551bf8e6e9dcae6d2a6bec4428653a72133e88f..13f6ecf412029ec1c8624884ba42a516a5bec719 100644 (file)
@@ -136,7 +136,7 @@ void LocaleBuilderTest::TestLocaleBuilder() {
         {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
           "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
           "ja_JP@attribute=attr1;calendar=gregorian"},
-        {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn-true",
+        {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
           "en@colnumeric=yes"},
         {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
           "th_TH@numbers=thai"},
@@ -152,7 +152,7 @@ void LocaleBuilderTest::TestLocaleBuilder() {
         // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
         // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
         // key = alphanum alpha
-        {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes",
+        {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
          "en@0a=yes;attribute=aaa-bbb"},
         {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
           "fr_FR@x=yoshito-icu"},
@@ -166,13 +166,13 @@ void LocaleBuilderTest::TestLocaleBuilder() {
         {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
           "en_US@timezone=America/New_York"},
         {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
-          "true", "T", "de-u-co-phonebk-kk-true-ks-level1",
+          "true", "T", "de-u-co-phonebk-kk-ks-level1",
           "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
         {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
           "en_US@calendar=gregorian"},
         {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
         {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
-        {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn-true",
+        {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
           "en_US@colnumeric=yes"},
         {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
         {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
index 06fd61760943866d5339c4962fef3a0e539b8f0a..bd51a1e4268597b6be9301ee609960a91887eb04 100644 (file)
@@ -258,6 +258,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
     TESTCASE_AUTO(TestConstructorAcceptsBCP47);
     TESTCASE_AUTO(TestForLanguageTag);
     TESTCASE_AUTO(TestToLanguageTag);
+    TESTCASE_AUTO(TestToLanguageTagOmitTrue);
     TESTCASE_AUTO(TestMoveAssign);
     TESTCASE_AUTO(TestMoveCtor);
     TESTCASE_AUTO(TestBug20407iVariantPreferredValue);
@@ -3266,6 +3267,32 @@ void LocaleTest::TestToLanguageTag() {
     assertTrue(result_bogus.c_str(), result_bogus.empty());
 }
 
+/* ICU-20310 */
+void LocaleTest::TestToLanguageTagOmitTrue() {
+    IcuTestErrorCode status(*this, "TestToLanguageTagOmitTrue()");
+    assertEquals("en-u-kn should drop true",
+                 "en-u-kn", Locale("en-u-kn-true").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+    assertEquals("en-u-kn should drop true",
+                 "en-u-kn", Locale("en-u-kn").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+
+    assertEquals("de-u-co should drop true",
+                 "de-u-co", Locale("de-u-co").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+    assertEquals("de-u-co should drop true",
+                 "de-u-co", Locale("de-u-co-yes").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+    assertEquals("de@collation=yes should drop true",
+                 "de-u-co", Locale("de@collation=yes").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+
+    assertEquals("cmn-Hans-CN-t-ca-u-ca-x-t-u should drop true",
+                 "cmn-Hans-CN-t-ca-u-ca-x-t-u",
+                 Locale("cmn-hans-cn-u-ca-t-ca-x-t-u").toLanguageTag<std::string>(status).c_str());
+    status.errIfFailureAndReset();
+}
+
 void LocaleTest::TestMoveAssign() {
     // ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
     Locale l1("de@collation=phonebook;x="
index b62cd26016951d4f2274bd0ad5e1dcdbef0784f6..0e8c09a5acfb6592f81df6948314a29b4d2d2cc2 100644 (file)
@@ -127,6 +127,7 @@ public:
 
     void TestForLanguageTag();
     void TestToLanguageTag();
+    void TestToLanguageTagOmitTrue();
 
     void TestMoveAssign();
     void TestMoveCtor();
index 6b12c8171732e7af81bc8086ab16d74a723f0a0b..14d1a943b91d14f4405d84fb02fcf932aea39a47 100644 (file)
@@ -697,7 +697,21 @@ public class LanguageTag {
     }
 
     public static String canonicalizeExtension(String s) {
-        return AsciiUtil.toLowerString(s);
+        s = AsciiUtil.toLowerString(s);
+        int found;
+        while (s.endsWith("-true")) {
+            s = s.substring(0, s.length() - 5);  // length of "-true" is 5
+        }
+        while ((found = s.indexOf("-true-")) > 0) {
+            s = s.substring(0, found) + s.substring(found + 5);  // length of "-true" is 5
+        }
+        while (s.endsWith("-yes")) {
+            s = s.substring(0, s.length() - 4);  // length of "-yes" is 4
+        }
+        while ((found = s.indexOf("-yes-")) > 0) {
+            s = s.substring(0, found) + s.substring(found + 4);  // length of "-yes" is 5
+        }
+        return s;
     }
 
     public static String canonicalizeExtensionSingleton(String s) {
index 71002bac2aefe9ff6109f3711dbe722d6be0bbdc..c0da0aedfe47c00656e4e33458e06df0469b8f64 100644 (file)
@@ -73,7 +73,7 @@ public class LocaleBuilderTest extends TestFmwk {
                 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T", "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=jpy"},
                 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", null, "T", "ja-JP-u-cu-jpy", "ja_JP@currency=jpy"},
                 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u", "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory", "ja_JP@attribute=attr1;calendar=gregorian"},
-                {"U", "en@colnumeric=yes", "K", "kn", "", "T", "en-u-kn-true", "en@colnumeric=yes"},
+                {"U", "en@colnumeric=yes", "K", "kn", "", "T", "en-u-kn", "en@colnumeric=yes"},
                 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai", "th_TH@numbers=thai"},
                 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
                 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
@@ -87,18 +87,18 @@ public class LocaleBuilderTest extends TestFmwk {
                 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
                 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
                 // "yes" to "true", but it will break roundtrip conversion if BCP 47 u extension has "0a-yes".
-                {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a-yes", "en@0a=yes;attribute=aaa-bbb"},
+                {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a", "en@0a=yes;attribute=aaa-bbb"},
                 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu", "fr_FR@x=yoshito-icu"},
                 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese", "ja_JP@calendar=japanese"},
                 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T", "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
                 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
                 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai", "th@calendar=gregorian;numbers=thai"},
                 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc", "en_US@timezone=America/New_York"},
-                {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk", "true", "T", "de-u-co-phonebk-kk-true-ks-level1", "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
+                {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk", "true", "T", "de-u-co-phonebk-kk-ks-level1", "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
                 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory", "en_US@calendar=gregorian"},
                 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
                 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
-                {"L", "en", "R", "US", "K", "kn", "", "T", "en-US-u-kn-true", "en_US@colnumeric=yes"},
+                {"L", "en", "R", "US", "K", "kn", "", "T", "en-US-u-kn", "en_US@colnumeric=yes"},
                 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
                 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
                 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T", "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
index 4c4ba972a083cbc2e38010d3c6c40dddcff655bb..d36097e95f9fbe205d3a03857cfd14172004028a 100644 (file)
@@ -4134,6 +4134,13 @@ public class ULocaleTest extends TestFmwk {
                 /* ICU-20478 */
                 {"sl__ROZAJ_BISKE_1994",   "sl-1994-biske-rozaj"},
                 {"en__SCOUSE_FONIPA",   "en-fonipa-scouse"},
+                /* ICU-20310 */
+                {"en-u-kn-true",   "en-u-kn"},
+                {"en-u-kn",   "en-u-kn"},
+                {"de-u-co-yes",   "de-u-co"},
+                {"de-u-co",   "de-u-co"},
+                {"de@collation=yes",   "de-u-co"},
+                {"cmn-hans-cn-u-ca-t-ca-x-t-u",   "cmn-Hans-CN-t-ca-u-ca-x-t-u"},
         };
 
         for (int i = 0; i < locale_to_langtag.length; i++) {