From: Fredrik Roubert <roubert@google.com>
Date: Thu, 13 Sep 2018 21:14:26 +0000 (-0700)
Subject: ICU-13417 Add the Locale::(addLikely|minimize)Subtags() functions.
X-Git-Tag: release-63-rc~67
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7c2a8d1fcc28b89cd5c5b58a67901b519d7a43ed;p=icu

ICU-13417 Add the Locale::(addLikely|minimize)Subtags() functions.

They are C++ wrappers of uloc_addLikelySubtags() and uloc_minimizeSubtags()
respectively, that take care of dynamic memory management.
---

diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp
index 19e4ac7f21b..d47f12edd51 100644
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@@ -714,6 +714,126 @@ Locale::setDefault( const   Locale&     newLocale,
     locale_set_default_internal(localeID, status);
 }
 
+void
+Locale::addLikelySubtags(UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    // The maximized locale ID string is often longer, but there is no good
+    // heuristic to estimate just how much longer. Leave that to CharString.
+    CharString maximizedLocaleID;
+    int32_t maximizedLocaleIDCapacity = uprv_strlen(fullName);
+
+    char* buffer;
+    int32_t reslen;
+
+    for (;;) {
+        buffer = maximizedLocaleID.getAppendBuffer(
+                /*minCapacity=*/maximizedLocaleIDCapacity,
+                /*desiredCapacityHint=*/maximizedLocaleIDCapacity,
+                maximizedLocaleIDCapacity,
+                status);
+
+        if (U_FAILURE(status)) {
+            return;
+        }
+
+        reslen = uloc_addLikelySubtags(
+                fullName,
+                buffer,
+                maximizedLocaleIDCapacity,
+                &status);
+
+        if (status != U_BUFFER_OVERFLOW_ERROR) {
+            break;
+        }
+
+        maximizedLocaleIDCapacity = reslen;
+        status = U_ZERO_ERROR;
+    }
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    maximizedLocaleID.append(buffer, reslen, status);
+    if (status == U_STRING_NOT_TERMINATED_WARNING) {
+        status = U_ZERO_ERROR;  // Terminators provided by CharString.
+    }
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
+    if (isBogus()) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+void
+Locale::minimizeSubtags(UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    // Except for a few edge cases (like the empty string, that is minimized to
+    // "en__POSIX"), minimized locale ID strings will be either the same length
+    // or shorter than their input.
+    CharString minimizedLocaleID;
+    int32_t minimizedLocaleIDCapacity = uprv_strlen(fullName);
+
+    char* buffer;
+    int32_t reslen;
+
+    for (;;) {
+        buffer = minimizedLocaleID.getAppendBuffer(
+                /*minCapacity=*/minimizedLocaleIDCapacity,
+                /*desiredCapacityHint=*/minimizedLocaleIDCapacity,
+                minimizedLocaleIDCapacity,
+                status);
+
+        if (U_FAILURE(status)) {
+            return;
+        }
+
+        reslen = uloc_minimizeSubtags(
+                fullName,
+                buffer,
+                minimizedLocaleIDCapacity,
+                &status);
+
+        if (status != U_BUFFER_OVERFLOW_ERROR) {
+            break;
+        }
+
+        // Because of the internal minimal buffer size of CharString, I can't
+        // think of any input data for which this could possibly ever happen.
+        // Maybe it would be better replaced with an assertion instead?
+        minimizedLocaleIDCapacity = reslen;
+        status = U_ZERO_ERROR;
+    }
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    minimizedLocaleID.append(buffer, reslen, status);
+    if (status == U_STRING_NOT_TERMINATED_WARNING) {
+        status = U_ZERO_ERROR;  // Terminators provided by CharString.
+    }
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
+    if (isBogus()) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
 Locale U_EXPORT2
 Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
 {
diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h
index ea48ed91a17..f570391b1bc 100644
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@@ -483,6 +483,69 @@ public:
      */
     const char * getBaseName() const;
 
+#ifndef U_HIDE_DRAFT_API
+    /**
+     * Add the likely subtags for this Locale, per the algorithm described
+     * in the following CLDR technical report:
+     *
+     *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+     *
+     * If this Locale is already in the maximal form, or not valid, or there is
+     * no data available for maximization, the Locale will be unchanged.
+     *
+     * For example, "und-Zzzz" cannot be maximized, since there is no
+     * reasonable maximization.
+     *
+     * Examples:
+     *
+     * "en" maximizes to "en_Latn_US"
+     *
+     * "de" maximizes to "de_Latn_US"
+     *
+     * "sr" maximizes to "sr_Cyrl_RS"
+     *
+     * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+     *
+     * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+     *
+     * @param status  error information if maximizing this Locale failed.
+     *                If this Locale is not well-formed, the error code is
+     *                U_ILLEGAL_ARGUMENT_ERROR.
+     * @draft ICU 63
+     */
+    void addLikelySubtags(UErrorCode& status);
+
+    /**
+     * Minimize the subtags for this Locale, per the algorithm described
+     * in the following CLDR technical report:
+     *
+     *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+     *
+     * If this Locale is already in the minimal form, or not valid, or there is
+     * no data available for minimization, the Locale will be unchanged.
+     *
+     * Since the minimization algorithm relies on proper maximization, see the
+     * comments for addLikelySubtags for reasons why there might not be any
+     * data.
+     *
+     * Examples:
+     *
+     * "en_Latn_US" minimizes to "en"
+     *
+     * "de_Latn_US" minimizes to "de"
+     *
+     * "sr_Cyrl_RS" minimizes to "sr"
+     *
+     * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+     * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+     *
+     * @param status  error information if maximizing this Locale failed.
+     *                If this Locale is not well-formed, the error code is
+     *                U_ILLEGAL_ARGUMENT_ERROR.
+     * @draft ICU 63
+     */
+    void minimizeSubtags(UErrorCode& status);
+#endif  // U_HIDE_DRAFT_API
 
     /**
      * Gets the list of keywords for the specified locale.
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt
index 55783ee04ed..441934da5ee 100644
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -164,7 +164,6 @@ library: common
     ubidi ushape ubiditransform
     listformatter
     resourcebundle service_registration resbund_cnv ures_cnv icudataver ucat
-    loclikely
     currency
     locale_display_names2
     conversion converter_selector ucnv_set ucnvdisp
@@ -383,7 +382,7 @@ group: cstr
 group: uscript
     uscript.o  # uscript_getCode() accepts a locale ID and loads its script code data
   deps
-    propname loclikely
+    propname resourcebundle
 
 group: uscript_props  # script metadata properties
     uscript_props.o
@@ -583,7 +582,7 @@ group: locale_display_names2
 group: currency
     ucurr.o
   deps
-    loclikely resourcebundle ulist ustring_case_locale
+    resourcebundle ulist ustring_case_locale
     stdlib_qsort  # for ucurr.o (which does not use ICU's uarrsort.o)
     static_unicode_sets usetiter
 
@@ -592,11 +591,6 @@ group: icudataver  # u_getDataVersion()
   deps
     resourcebundle
 
-group: loclikely
-    loclikely.o
-  deps
-    resourcebundle uscript_props propname
-
 group: locresdata
     # This was intended to collect locale functions that load resource bundle data.
     # See the resourcebundle group about what else loads data.
@@ -631,9 +625,12 @@ group: resourcebundle
     locid.o locmap.o wintz.o
     # Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
     locbased.o
+    loclikely.o
   deps
     udata ucol_swp
     sort stringenumeration uhash uvector
+    uscript_props propname
+    bytesinkutil
 
 group: udata
     udata.o ucmndata.o udatamem.o
@@ -832,7 +829,6 @@ group: localedata
   deps
     uniset_props resourcebundle
     uset_props  # TODO: change to using C++ UnicodeSet, remove this dependency
-    loclikely
 
 group: genderinfo
     gender.o
diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp
index 3f5756e731a..c86db1bed33 100644
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@@ -221,6 +221,8 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
 #endif
     TESTCASE_AUTO(TestSetIsBogus);
     TESTCASE_AUTO(TestParallelAPIValues);
+    TESTCASE_AUTO(TestAddLikelySubtags);
+    TESTCASE_AUTO(TestMinimizeSubtags);
     TESTCASE_AUTO(TestKeywordVariants);
     TESTCASE_AUTO(TestCreateUnicodeKeywords);
     TESTCASE_AUTO(TestKeywordVariantParsing);
@@ -1607,6 +1609,34 @@ LocaleTest::TestSetIsBogus() {
 }
 
 
+void
+LocaleTest::TestAddLikelySubtags() {
+    IcuTestErrorCode status(*this, "TestAddLikelySubtags()");
+
+    static const Locale min("sv");
+    static const Locale max("sv_Latn_SE");
+
+    Locale result(min);
+    result.addLikelySubtags(status);
+    status.errIfFailureAndReset("\"%s\"", min.getName());
+    assertEquals("addLikelySubtags", max.getName(), result.getName());
+}
+
+
+void
+LocaleTest::TestMinimizeSubtags() {
+    IcuTestErrorCode status(*this, "TestMinimizeSubtags()");
+
+    static const Locale max("zh_Hant_TW");
+    static const Locale min("zh_TW");
+
+    Locale result(max);
+    result.minimizeSubtags(status);
+    status.errIfFailureAndReset("\"%s\"", max.getName());
+    assertEquals("minimizeSubtags", min.getName(), result.getName());
+}
+
+
 void
 LocaleTest::TestKeywordVariants(void) {
     static const struct {
diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h
index 06589161cff..e93b17df84a 100644
--- a/icu4c/source/test/intltest/loctest.h
+++ b/icu4c/source/test/intltest/loctest.h
@@ -113,6 +113,9 @@ public:
     void TestBug13277();
     void TestBug13554();
 
+    void TestAddLikelySubtags();
+    void TestMinimizeSubtags();
+
     void TestForLanguageTag();
     void TestToLanguageTag();