]> granicus.if.org Git - icu/commitdiff
ICU-20158 Pass ByteSink all the way to _uloc_(addLikely|minimize)Subtags().
authorFredrik Roubert <roubert@google.com>
Wed, 20 Feb 2019 23:23:02 +0000 (00:23 +0100)
committerFredrik Roubert <fredrik@roubert.name>
Thu, 21 Feb 2019 11:19:04 +0000 (12:19 +0100)
This eliminates the need for scratch buffers in any code path that ends
with these functions and also eliminates the need for counting bytes,
something that ByteSink will now handle correctly when needed.

Existing calls to uloc_addLikelySubtags() and uloc_minimizeSubtags()
throughout ICU4C implementation code are also updated to instead use
either the Locale or ulocimp_* functions with the new API.

None of this should have any externally visible effect, it's all about
cleaning up implementation internals.

icu4c/source/common/locid.cpp
icu4c/source/common/loclikely.cpp
icu4c/source/common/ulocimp.h
icu4c/source/common/uscript.cpp
icu4c/source/i18n/calendar.cpp
icu4c/source/i18n/dtptngen.cpp

index f1b25f4600b4cea01b59ab61d51a4487770991e0..a6a518201c24f87e3c15d335fc0bb637827f3aef 100644 (file)
@@ -740,46 +740,10 @@ Locale::addLikelySubtags(UErrorCode& status) {
         return;
     }
 
-    // The maximized locale ID string is often longer, but there is no good
-    // heuristic to estimate just how much longer. Leave that to CharString.
     CharString maximizedLocaleID;
-    int32_t maximizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
-
-    char* buffer;
-    int32_t reslen;
-
-    for (;;) {
-        buffer = maximizedLocaleID.getAppendBuffer(
-                /*minCapacity=*/maximizedLocaleIDCapacity,
-                /*desiredCapacityHint=*/maximizedLocaleIDCapacity,
-                maximizedLocaleIDCapacity,
-                status);
-
-        if (U_FAILURE(status)) {
-            return;
-        }
-
-        reslen = uloc_addLikelySubtags(
-                fullName,
-                buffer,
-                maximizedLocaleIDCapacity,
-                &status);
-
-        if (status != U_BUFFER_OVERFLOW_ERROR) {
-            break;
-        }
-
-        maximizedLocaleIDCapacity = reslen;
-        status = U_ZERO_ERROR;
-    }
-
-    if (U_FAILURE(status)) {
-        return;
-    }
-
-    maximizedLocaleID.append(buffer, reslen, status);
-    if (status == U_STRING_NOT_TERMINATED_WARNING) {
-        status = U_ZERO_ERROR;  // Terminators provided by CharString.
+    {
+        CharStringByteSink sink(&maximizedLocaleID);
+        ulocimp_addLikelySubtags(fullName, sink, &status);
     }
 
     if (U_FAILURE(status)) {
@@ -798,50 +762,10 @@ Locale::minimizeSubtags(UErrorCode& status) {
         return;
     }
 
-    // Except for a few edge cases (like the empty string, that is minimized to
-    // "en__POSIX"), minimized locale ID strings will be either the same length
-    // or shorter than their input.
     CharString minimizedLocaleID;
-    int32_t minimizedLocaleIDCapacity = static_cast<int32_t>(uprv_strlen(fullName));
-
-    char* buffer;
-    int32_t reslen;
-
-    for (;;) {
-        buffer = minimizedLocaleID.getAppendBuffer(
-                /*minCapacity=*/minimizedLocaleIDCapacity,
-                /*desiredCapacityHint=*/minimizedLocaleIDCapacity,
-                minimizedLocaleIDCapacity,
-                status);
-
-        if (U_FAILURE(status)) {
-            return;
-        }
-
-        reslen = uloc_minimizeSubtags(
-                fullName,
-                buffer,
-                minimizedLocaleIDCapacity,
-                &status);
-
-        if (status != U_BUFFER_OVERFLOW_ERROR) {
-            break;
-        }
-
-        // Because of the internal minimal buffer size of CharString, I can't
-        // think of any input data for which this could possibly ever happen.
-        // Maybe it would be better replaced with an assertion instead?
-        minimizedLocaleIDCapacity = reslen;
-        status = U_ZERO_ERROR;
-    }
-
-    if (U_FAILURE(status)) {
-        return;
-    }
-
-    minimizedLocaleID.append(buffer, reslen, status);
-    if (status == U_STRING_NOT_TERMINATED_WARNING) {
-        status = U_ZERO_ERROR;  // Terminators provided by CharString.
+    {
+        CharStringByteSink sink(&minimizedLocaleID);
+        ulocimp_minimizeSubtags(fullName, sink, &status);
     }
 
     if (U_FAILURE(status)) {
index 8ceb352bd379a84b299f09cb67c6a3621e1cff9b..d4df914a9bb99e1543f13fa62384603526680559 100644 (file)
@@ -19,6 +19,7 @@
 *   that then do not depend on resource bundle code and likely-subtags data.
 */
 
+#include "unicode/bytestream.h"
 #include "unicode/utypes.h"
 #include "unicode/locid.h"
 #include "unicode/putil.h"
@@ -26,6 +27,8 @@
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
 #include "unicode/uscript.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "ulocimp.h"
@@ -145,12 +148,10 @@ static const char* const unknownRegion = "ZZ";
  * @param trailing Any trailing data to append to the new tag.
  * @param trailingLength The length of the trailing data.
  * @param alternateTags A string containing any alternate tags.
- * @param tag The output buffer.
- * @param tagCapacity The capacity of the output buffer.
+ * @param sink The output sink receiving the tag string.
  * @param err A pointer to a UErrorCode for error reporting.
- * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
  **/
-static int32_t U_CALLCONV
+static void U_CALLCONV
 createTagStringWithAlternates(
     const char* lang,
     int32_t langLength,
@@ -161,16 +162,13 @@ createTagStringWithAlternates(
     const char* trailing,
     int32_t trailingLength,
     const char* alternateTags,
-    char* tag,
-    int32_t tagCapacity,
+    icu::ByteSink& sink,
     UErrorCode* err) {
 
     if (U_FAILURE(*err)) {
         goto error;
     }
-    else if (tag == NULL ||
-             tagCapacity <= 0 ||
-             langLength >= ULOC_LANG_CAPACITY ||
+    else if (langLength >= ULOC_LANG_CAPACITY ||
              scriptLength >= ULOC_SCRIPT_CAPACITY ||
              regionLength >= ULOC_COUNTRY_CAPACITY) {
         goto error;
@@ -184,7 +182,6 @@ createTagStringWithAlternates(
          **/
         char tagBuffer[ULOC_FULLNAME_CAPACITY];
         int32_t tagLength = 0;
-        int32_t capacityRemaining = tagCapacity;
         UBool regionAppended = FALSE;
 
         if (langLength > 0) {
@@ -311,55 +308,28 @@ createTagStringWithAlternates(
             }
         }
 
-        {
-            const int32_t toCopy =
-                tagLength >= tagCapacity ? tagCapacity : tagLength;
-
-            /**
-             * Copy the partial tag from our internal buffer to the supplied
-             * target.
-             **/
-            uprv_memcpy(
-                tag,
-                tagBuffer,
-                toCopy);
-
-            capacityRemaining -= toCopy;
-        }
+        /**
+         * Copy the partial tag from our internal buffer to the supplied
+         * target.
+         **/
+        sink.Append(tagBuffer, tagLength);
 
         if (trailingLength > 0) {
-            if (*trailing != '@' && capacityRemaining > 0) {
-                tag[tagLength++] = '_';
-                --capacityRemaining;
-                if (capacityRemaining > 0 && !regionAppended) {
+            if (*trailing != '@') {
+                sink.Append("_", 1);
+                if (!regionAppended) {
                     /* extra separator is required */
-                    tag[tagLength++] = '_';
-                    --capacityRemaining;
+                    sink.Append("_", 1);
                 }
             }
 
-            if (capacityRemaining > 0) {
-                /*
-                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
-                 * don't know if the user-supplied buffers overlap.
-                 */
-                const int32_t toCopy =
-                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
-
-                uprv_memmove(
-                    &tag[tagLength],
-                    trailing,
-                    toCopy);
-            }
+            /*
+             * Copy the trailing data into the supplied buffer.
+             */
+            sink.Append(trailing, trailingLength);
         }
 
-        tagLength += trailingLength;
-
-        return u_terminateChars(
-                    tag,
-                    tagCapacity,
-                    tagLength,
-                    err);
+        return;
     }
 
 error:
@@ -373,8 +343,6 @@ error:
         U_SUCCESS(*err)) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
     }
-
-    return -1;
 }
 
 /**
@@ -398,12 +366,10 @@ error:
  * @param regionLength The length of the region tag.
  * @param trailing Any trailing data to append to the new tag.
  * @param trailingLength The length of the trailing data.
- * @param tag The output buffer.
- * @param tagCapacity The capacity of the output buffer.
+ * @param sink The output sink receiving the tag string.
  * @param err A pointer to a UErrorCode for error reporting.
- * @return The length of the tag string, which may be greater than tagCapacity.
  **/
-static int32_t U_CALLCONV
+static void U_CALLCONV
 createTagString(
     const char* lang,
     int32_t langLength,
@@ -413,11 +379,10 @@ createTagString(
     int32_t regionLength,
     const char* trailing,
     int32_t trailingLength,
-    char* tag,
-    int32_t tagCapacity,
+    icu::ByteSink& sink,
     UErrorCode* err)
 {
-    return createTagStringWithAlternates(
+    createTagStringWithAlternates(
                 lang,
                 langLength,
                 script,
@@ -427,8 +392,7 @@ createTagString(
                 trailing,
                 trailingLength,
                 NULL,
-                tag,
-                tagCapacity,
+                sink,
                 err);
 }
 
@@ -576,7 +540,7 @@ error:
     goto exit;
 }
 
-static int32_t U_CALLCONV
+static UBool U_CALLCONV
 createLikelySubtagsString(
     const char* lang,
     int32_t langLength,
@@ -586,17 +550,14 @@ createLikelySubtagsString(
     int32_t regionLength,
     const char* variants,
     int32_t variantsLength,
-    char* tag,
-    int32_t tagCapacity,
-    UErrorCode* err)
-{
+    icu::ByteSink& sink,
+    UErrorCode* err) {
     /**
      * ULOC_FULLNAME_CAPACITY will provide enough capacity
      * that we can build a string that contains the language,
      * script and region code without worrying about overrunning
      * the user-supplied buffer.
      **/
-    char tagBuffer[ULOC_FULLNAME_CAPACITY];
     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
 
     if(U_FAILURE(*err)) {
@@ -610,25 +571,28 @@ createLikelySubtagsString(
 
         const char* likelySubtags = NULL;
 
-        createTagString(
-            lang,
-            langLength,
-            script,
-            scriptLength,
-            region,
-            regionLength,
-            NULL,
-            0,
-            tagBuffer,
-            sizeof(tagBuffer),
-            err);
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
+            createTagString(
+                lang,
+                langLength,
+                script,
+                scriptLength,
+                region,
+                regionLength,
+                NULL,
+                0,
+                sink,
+                err);
+        }
         if(U_FAILURE(*err)) {
             goto error;
         }
 
         likelySubtags =
             findLikelySubtags(
-                tagBuffer,
+                tagBuffer.data(),
                 likelySubtagsBuffer,
                 sizeof(likelySubtagsBuffer),
                 err);
@@ -640,7 +604,7 @@ createLikelySubtagsString(
             /* Always use the language tag from the
                maximal string, since it may be more
                specific than the one provided. */
-            return createTagStringWithAlternates(
+            createTagStringWithAlternates(
                         NULL,
                         0,
                         NULL,
@@ -650,9 +614,9 @@ createLikelySubtagsString(
                         variants,
                         variantsLength,
                         likelySubtags,
-                        tag,
-                        tagCapacity,
+                        sink,
                         err);
+            return TRUE;
         }
     }
 
@@ -663,25 +627,28 @@ createLikelySubtagsString(
 
         const char* likelySubtags = NULL;
 
-        createTagString(
-            lang,
-            langLength,
-            script,
-            scriptLength,
-            NULL,
-            0,
-            NULL,
-            0,
-            tagBuffer,
-            sizeof(tagBuffer),
-            err);
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
+            createTagString(
+                lang,
+                langLength,
+                script,
+                scriptLength,
+                NULL,
+                0,
+                NULL,
+                0,
+                sink,
+                err);
+        }
         if(U_FAILURE(*err)) {
             goto error;
         }
 
         likelySubtags =
             findLikelySubtags(
-                tagBuffer,
+                tagBuffer.data(),
                 likelySubtagsBuffer,
                 sizeof(likelySubtagsBuffer),
                 err);
@@ -693,7 +660,7 @@ createLikelySubtagsString(
             /* Always use the language tag from the
                maximal string, since it may be more
                specific than the one provided. */
-            return createTagStringWithAlternates(
+            createTagStringWithAlternates(
                         NULL,
                         0,
                         NULL,
@@ -703,9 +670,9 @@ createLikelySubtagsString(
                         variants,
                         variantsLength,
                         likelySubtags,
-                        tag,
-                        tagCapacity,
+                        sink,
                         err);
+            return TRUE;
         }
     }
 
@@ -716,25 +683,28 @@ createLikelySubtagsString(
 
         const char* likelySubtags = NULL;
 
-        createTagString(
-            lang,
-            langLength,
-            NULL,
-            0,
-            region,
-            regionLength,
-            NULL,
-            0,
-            tagBuffer,
-            sizeof(tagBuffer),
-            err);
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
+            createTagString(
+                lang,
+                langLength,
+                NULL,
+                0,
+                region,
+                regionLength,
+                NULL,
+                0,
+                sink,
+                err);
+        }
         if(U_FAILURE(*err)) {
             goto error;
         }
 
         likelySubtags =
             findLikelySubtags(
-                tagBuffer,
+                tagBuffer.data(),
                 likelySubtagsBuffer,
                 sizeof(likelySubtagsBuffer),
                 err);
@@ -746,7 +716,7 @@ createLikelySubtagsString(
             /* Always use the language tag from the
                maximal string, since it may be more
                specific than the one provided. */
-            return createTagStringWithAlternates(
+            createTagStringWithAlternates(
                         NULL,
                         0,
                         script,
@@ -756,9 +726,9 @@ createLikelySubtagsString(
                         variants,
                         variantsLength,
                         likelySubtags,
-                        tag,
-                        tagCapacity,
+                        sink,
                         err);
+            return TRUE;
         }
     }
 
@@ -768,25 +738,28 @@ createLikelySubtagsString(
     {
         const char* likelySubtags = NULL;
 
-        createTagString(
-            lang,
-            langLength,
-            NULL,
-            0,
-            NULL,
-            0,
-            NULL,
-            0,
-            tagBuffer,
-            sizeof(tagBuffer),
-            err);
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
+            createTagString(
+                lang,
+                langLength,
+                NULL,
+                0,
+                NULL,
+                0,
+                NULL,
+                0,
+                sink,
+                err);
+        }
         if(U_FAILURE(*err)) {
             goto error;
         }
 
         likelySubtags =
             findLikelySubtags(
-                tagBuffer,
+                tagBuffer.data(),
                 likelySubtagsBuffer,
                 sizeof(likelySubtagsBuffer),
                 err);
@@ -798,7 +771,7 @@ createLikelySubtagsString(
             /* Always use the language tag from the
                maximal string, since it may be more
                specific than the one provided. */
-            return createTagStringWithAlternates(
+            createTagStringWithAlternates(
                         NULL,
                         0,
                         script,
@@ -808,17 +781,13 @@ createLikelySubtagsString(
                         variants,
                         variantsLength,
                         likelySubtags,
-                        tag,
-                        tagCapacity,
+                        sink,
                         err);
+            return TRUE;
         }
     }
 
-    return u_terminateChars(
-                tag,
-                tagCapacity,
-                0,
-                err);
+    return FALSE;
 
 error:
 
@@ -826,7 +795,7 @@ error:
         *err = U_ILLEGAL_ARGUMENT_ERROR;
     }
 
-    return -1;
+    return FALSE;
 }
 
 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
@@ -848,12 +817,10 @@ error:
         } \
     }
 
-static int32_t
-_uloc_addLikelySubtags(const char*    localeID,
-         char* maximizedLocaleID,
-         int32_t maximizedLocaleIDCapacity,
-         UErrorCode* err)
-{
+static void
+_uloc_addLikelySubtags(const char* localeID,
+                       icu::ByteSink& sink,
+                       UErrorCode* err) {
     char lang[ULOC_LANG_CAPACITY];
     int32_t langLength = sizeof(lang);
     char script[ULOC_SCRIPT_CAPACITY];
@@ -863,14 +830,12 @@ _uloc_addLikelySubtags(const char*    localeID,
     const char* trailing = "";
     int32_t trailingLength = 0;
     int32_t trailingIndex = 0;
-    int32_t resultLength = 0;
+    UBool success = FALSE;
 
     if(U_FAILURE(*err)) {
         goto error;
     }
-    else if (localeID == NULL ||
-             maximizedLocaleID == NULL ||
-             maximizedLocaleIDCapacity <= 0) {
+    if (localeID == NULL) {
         goto error;
     }
 
@@ -901,7 +866,7 @@ _uloc_addLikelySubtags(const char*    localeID,
 
     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
 
-    resultLength =
+    success =
         createLikelySubtagsString(
             lang,
             langLength,
@@ -911,55 +876,32 @@ _uloc_addLikelySubtags(const char*    localeID,
             regionLength,
             trailing,
             trailingLength,
-            maximizedLocaleID,
-            maximizedLocaleIDCapacity,
+            sink,
             err);
 
-    if (resultLength == 0) {
+    if (!success) {
         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
 
         /*
          * If we get here, we need to return localeID.
          */
-        uprv_memcpy(
-            maximizedLocaleID,
-            localeID,
-            localIDLength <= maximizedLocaleIDCapacity ? 
-                localIDLength : maximizedLocaleIDCapacity);
-
-        resultLength =
-            u_terminateChars(
-                maximizedLocaleID,
-                maximizedLocaleIDCapacity,
-                localIDLength,
-                err);
+        sink.Append(localeID, localIDLength);
     }
 
-    return resultLength;
+    return;
 
 error:
 
     if (!U_FAILURE(*err)) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
     }
-
-    return -1;
 }
 
-static int32_t
-_uloc_minimizeSubtags(const char*    localeID,
-         char* minimizedLocaleID,
-         int32_t minimizedLocaleIDCapacity,
-         UErrorCode* err)
-{
-    /**
-     * ULOC_FULLNAME_CAPACITY will provide enough capacity
-     * that we can build a string that contains the language,
-     * script and region code without worrying about overrunning
-     * the user-supplied buffer.
-     **/
-    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
-    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
+static void
+_uloc_minimizeSubtags(const char* localeID,
+                      icu::ByteSink& sink,
+                      UErrorCode* err) {
+    icu::CharString maximizedTagBuffer;
 
     char lang[ULOC_LANG_CAPACITY];
     int32_t langLength = sizeof(lang);
@@ -974,9 +916,7 @@ _uloc_minimizeSubtags(const char*    localeID,
     if(U_FAILURE(*err)) {
         goto error;
     }
-    else if (localeID == NULL ||
-             minimizedLocaleID == NULL ||
-             minimizedLocaleIDCapacity <= 0) {
+    else if (localeID == NULL) {
         goto error;
     }
 
@@ -1009,32 +949,32 @@ _uloc_minimizeSubtags(const char*    localeID,
 
     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
 
-    createTagString(
-        lang,
-        langLength,
-        script,
-        scriptLength,
-        region,
-        regionLength,
-        NULL,
-        0,
-        maximizedTagBuffer,
-        maximizedTagBufferLength,
-        err);
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
+    {
+        icu::CharString base;
+        {
+            icu::CharStringByteSink sink(&base);
+            createTagString(
+                lang,
+                langLength,
+                script,
+                scriptLength,
+                region,
+                regionLength,
+                NULL,
+                0,
+                sink,
+                err);
+        }
 
-    /**
-     * First, we need to first get the maximization
-     * from AddLikelySubtags.
-     **/
-    maximizedTagBufferLength =
-        uloc_addLikelySubtags(
-            maximizedTagBuffer,
-            maximizedTagBuffer,
-            maximizedTagBufferLength,
-            err);
+        /**
+         * First, we need to first get the maximization
+         * from AddLikelySubtags.
+         **/
+        {
+            icu::CharStringByteSink sink(&maximizedTagBuffer);
+            ulocimp_addLikelySubtags(base.data(), sink, err);
+        }
+    }
 
     if(U_FAILURE(*err)) {
         goto error;
@@ -1044,9 +984,9 @@ _uloc_minimizeSubtags(const char*    localeID,
      * Start first with just the language.
      **/
     {
-        char tagBuffer[ULOC_FULLNAME_CAPACITY];
-
-        const int32_t tagBufferLength =
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1056,19 +996,19 @@ _uloc_minimizeSubtags(const char*    localeID,
                 0,
                 NULL,
                 0,
-                tagBuffer,
-                sizeof(tagBuffer),
+                sink,
                 err);
+        }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
         else if (uprv_strnicmp(
-                    maximizedTagBuffer,
-                    tagBuffer,
-                    tagBufferLength) == 0) {
+                    maximizedTagBuffer.data(),
+                    tagBuffer.data(),
+                    tagBuffer.length()) == 0) {
 
-            return createTagString(
+            createTagString(
                         lang,
                         langLength,
                         NULL,
@@ -1077,9 +1017,9 @@ _uloc_minimizeSubtags(const char*    localeID,
                         0,
                         trailing,
                         trailingLength,
-                        minimizedLocaleID,
-                        minimizedLocaleIDCapacity,
+                        sink,
                         err);
+            return;
         }
     }
 
@@ -1088,9 +1028,9 @@ _uloc_minimizeSubtags(const char*    localeID,
      **/
     if (regionLength > 0) {
 
-        char tagBuffer[ULOC_FULLNAME_CAPACITY];
-
-        const int32_t tagBufferLength =
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1100,19 +1040,19 @@ _uloc_minimizeSubtags(const char*    localeID,
                 regionLength,
                 NULL,
                 0,
-                tagBuffer,
-                sizeof(tagBuffer),
+                sink,
                 err);
+        }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
         else if (uprv_strnicmp(
-                    maximizedTagBuffer,
-                    tagBuffer,
-                    tagBufferLength) == 0) {
+                    maximizedTagBuffer.data(),
+                    tagBuffer.data(),
+                    tagBuffer.length()) == 0) {
 
-            return createTagString(
+            createTagString(
                         lang,
                         langLength,
                         NULL,
@@ -1121,9 +1061,9 @@ _uloc_minimizeSubtags(const char*    localeID,
                         regionLength,
                         trailing,
                         trailingLength,
-                        minimizedLocaleID,
-                        minimizedLocaleIDCapacity,
+                        sink,
                         err);
+            return;
         }
     }
 
@@ -1133,9 +1073,9 @@ _uloc_minimizeSubtags(const char*    localeID,
      * maximal version that we already have.
      **/
     if (scriptLength > 0 && regionLength > 0) {
-        char tagBuffer[ULOC_FULLNAME_CAPACITY];
-
-        const int32_t tagBufferLength =
+        icu::CharString tagBuffer;
+        {
+            icu::CharStringByteSink sink(&tagBuffer);
             createLikelySubtagsString(
                 lang,
                 langLength,
@@ -1145,19 +1085,19 @@ _uloc_minimizeSubtags(const char*    localeID,
                 0,
                 NULL,
                 0,
-                tagBuffer,
-                sizeof(tagBuffer),
+                sink,
                 err);
+        }
 
         if(U_FAILURE(*err)) {
             goto error;
         }
         else if (uprv_strnicmp(
-                    maximizedTagBuffer,
-                    tagBuffer,
-                    tagBufferLength) == 0) {
+                    maximizedTagBuffer.data(),
+                    tagBuffer.data(),
+                    tagBuffer.length()) == 0) {
 
-            return createTagString(
+            createTagString(
                         lang,
                         langLength,
                         script,
@@ -1166,9 +1106,9 @@ _uloc_minimizeSubtags(const char*    localeID,
                         0,
                         trailing,
                         trailingLength,
-                        minimizedLocaleID,
-                        minimizedLocaleIDCapacity,
+                        sink,
                         err);
+            return;
         }
     }
 
@@ -1177,18 +1117,8 @@ _uloc_minimizeSubtags(const char*    localeID,
          * If we got here, return the locale ID parameter.
          **/
         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
-
-        uprv_memcpy(
-            minimizedLocaleID,
-            localeID,
-            localeIDLength <= minimizedLocaleIDCapacity ? 
-                localeIDLength : minimizedLocaleIDCapacity);
-
-        return u_terminateChars(
-                    minimizedLocaleID,
-                    minimizedLocaleIDCapacity,
-                    localeIDLength,
-                    err);
+        sink.Append(localeID, localeIDLength);
+        return;
     }
 
 error:
@@ -1196,10 +1126,6 @@ error:
     if (!U_FAILURE(*err)) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
     }
-
-    return -1;
-
-
 }
 
 static UBool
@@ -1230,51 +1156,83 @@ do_canonicalize(const char*    localeID,
 }
 
 U_CAPI int32_t U_EXPORT2
-uloc_addLikelySubtags(const char*    localeID,
-         char* maximizedLocaleID,
-         int32_t maximizedLocaleIDCapacity,
-         UErrorCode* err)
-{
+uloc_addLikelySubtags(const char* localeID,
+                      char* maximizedLocaleID,
+                      int32_t maximizedLocaleIDCapacity,
+                      UErrorCode* status) {
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    icu::CheckedArrayByteSink sink(
+            maximizedLocaleID, maximizedLocaleIDCapacity);
+
+    ulocimp_addLikelySubtags(localeID, sink, status);
+    int32_t reslen = sink.NumberOfBytesAppended();
+
+    if (U_FAILURE(*status)) {
+        return sink.Overflowed() ? reslen : -1;
+    }
+
+    if (sink.Overflowed()) {
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    } else {
+        u_terminateChars(
+                maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
+    }
+
+    return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_addLikelySubtags(const char* localeID,
+                         icu::ByteSink& sink,
+                         UErrorCode* status) {
     char localeBuffer[ULOC_FULLNAME_CAPACITY];
 
-    if (!do_canonicalize(
-        localeID,
-        localeBuffer,
-        sizeof(localeBuffer),
-        err)) {
-        return -1;
+    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+        _uloc_addLikelySubtags(localeBuffer, sink, status);
     }
-    else {
-        return _uloc_addLikelySubtags(
-                    localeBuffer,
-                    maximizedLocaleID,
-                    maximizedLocaleIDCapacity,
-                    err);
-    }    
 }
 
 U_CAPI int32_t U_EXPORT2
-uloc_minimizeSubtags(const char*    localeID,
-         char* minimizedLocaleID,
-         int32_t minimizedLocaleIDCapacity,
-         UErrorCode* err)
-{
+uloc_minimizeSubtags(const char* localeID,
+                     char* minimizedLocaleID,
+                     int32_t minimizedLocaleIDCapacity,
+                     UErrorCode* status) {
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    icu::CheckedArrayByteSink sink(
+            minimizedLocaleID, minimizedLocaleIDCapacity);
+
+    ulocimp_minimizeSubtags(localeID, sink, status);
+    int32_t reslen = sink.NumberOfBytesAppended();
+
+    if (U_FAILURE(*status)) {
+        return sink.Overflowed() ? reslen : -1;
+    }
+
+    if (sink.Overflowed()) {
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    } else {
+        u_terminateChars(
+                minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
+    }
+
+    return reslen;
+}
+
+U_CAPI void U_EXPORT2
+ulocimp_minimizeSubtags(const char* localeID,
+                        icu::ByteSink& sink,
+                        UErrorCode* status) {
     char localeBuffer[ULOC_FULLNAME_CAPACITY];
 
-    if (!do_canonicalize(
-        localeID,
-        localeBuffer,
-        sizeof(localeBuffer),
-        err)) {
-        return -1;
+    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
+        _uloc_minimizeSubtags(localeBuffer, sink, status);
     }
-    else {
-        return _uloc_minimizeSubtags(
-                    localeBuffer,
-                    minimizedLocaleID,
-                    minimizedLocaleIDCapacity,
-                    err);
-    }    
 }
 
 // Pairs of (language subtag, + or -) for finding out fast if common languages
@@ -1282,7 +1240,7 @@ uloc_minimizeSubtags(const char*    localeID,
 static const char LANG_DIR_STRING[] =
         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
 
-// Implemented here because this calls uloc_addLikelySubtags().
+// Implemented here because this calls ulocimp_addLikelySubtags().
 U_CAPI UBool U_EXPORT2
 uloc_isRightToLeft(const char *locale) {
     UErrorCode errorCode = U_ZERO_ERROR;
@@ -1309,12 +1267,15 @@ uloc_isRightToLeft(const char *locale) {
         }
         // Otherwise, find the likely script.
         errorCode = U_ZERO_ERROR;
-        char likely[ULOC_FULLNAME_CAPACITY];
-        (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
+        icu::CharString likely;
+        {
+            icu::CharStringByteSink sink(&likely);
+            ulocimp_addLikelySubtags(locale, sink, &errorCode);
+        }
         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
             return FALSE;
         }
-        scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
+        scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
                 scriptLength == 0) {
             return FALSE;
@@ -1365,11 +1326,14 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
             rgLen = 0;
         } else if (rgLen == 0 && inferRegion) {
             // no unicode_region_subtag but inferRegion TRUE, try likely subtags
-            char locBuf[ULOC_FULLNAME_CAPACITY];
             rgStatus = U_ZERO_ERROR;
-            (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
+            icu::CharString locBuf;
+            {
+                icu::CharStringByteSink sink(&locBuf);
+                ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
+            }
             if (U_SUCCESS(rgStatus)) {
-                rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
+                rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
                 if (U_FAILURE(*status)) {
                     rgLen = 0;
                 }
index fd16af5ae529b9466a21787ed76bedfc5bdead89..31d3c44d0bec689a5ff2c706e7e26988dfbdc068 100644 (file)
@@ -145,6 +145,74 @@ U_CAPI int32_t U_EXPORT2
 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
                                      char *region, int32_t regionCapacity, UErrorCode* status);
 
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer.  For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param sink The output sink receiving the maximized locale
+ * @param err Error information if maximizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @internal ICU 64
+ */
+U_STABLE void U_EXPORT2
+ulocimp_addLikelySubtags(const char* localeID,
+                         icu::ByteSink& sink,
+                         UErrorCode* err);
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ *   http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer.  Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for ulocimp_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param sink The output sink receiving the maximized locale
+ * @param err Error information if minimizing the locale failed.  If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @internal ICU 64
+ */
+U_STABLE void U_EXPORT2
+ulocimp_minimizeSubtags(const char* localeID,
+                        icu::ByteSink& sink,
+                        UErrorCode* err);
+
 U_CAPI const char * U_EXPORT2
 locale_getKeywordsStart(const char *localeID);
 
index 83b5f7ef168f7e4cf3900df1440febf1e60c80f7..98528c158b4387d2e347d750396516375e89a045 100644 (file)
 #include "unicode/uchar.h"
 #include "unicode/uscript.h"
 #include "unicode/uloc.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "ulocimp.h"
 
 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
@@ -98,7 +101,6 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
                 int32_t capacity,
                 UErrorCode* err){
     UBool triedCode;
-    char likely[ULOC_FULLNAME_CAPACITY];
     UErrorCode internalErrorCode;
     int32_t length;
 
@@ -125,10 +127,13 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
     if(U_FAILURE(*err) || length != 0) {
         return length;
     }
-    (void)uloc_addLikelySubtags(nameOrAbbrOrLocale,
-                                likely, UPRV_LENGTHOF(likely), &internalErrorCode);
+    icu::CharString likely;
+    {
+        icu::CharStringByteSink sink(&likely);
+        ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
+    }
     if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
-        length = getCodesFromLocale(likely, fillIn, capacity, err);
+        length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
         if(U_FAILURE(*err) || length != 0) {
             return length;
         }
index 63197bd27c033387f12aaf62b9eb07b03ff9b37d..4f451ee099cf6b1315848b2dea659e6638985423 100644 (file)
@@ -3796,18 +3796,16 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode&
     // 2). If the locale has a script designation then we ignore it,
     //     then remove it ( i.e. "en_Latn_US" becomes "en_US" )
 
-    char minLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
     UErrorCode myStatus = U_ZERO_ERROR;
 
-    uloc_minimizeSubtags(desiredLocale.getName(),minLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus);
-    Locale min = Locale::createFromName(minLocaleID);
+    Locale min(desiredLocale);
+    min.minimizeSubtags(myStatus);
     Locale useLocale;
     if ( uprv_strlen(desiredLocale.getCountry()) == 0 ||
          (uprv_strlen(desiredLocale.getScript()) > 0 && uprv_strlen(min.getScript()) == 0) ) {
-        char maxLocaleID[ULOC_FULLNAME_CAPACITY] = { 0 };
         myStatus = U_ZERO_ERROR;
-        uloc_addLikelySubtags(desiredLocale.getName(),maxLocaleID,ULOC_FULLNAME_CAPACITY,&myStatus);
-        Locale max = Locale::createFromName(maxLocaleID);
+        Locale max(desiredLocale);
+        max.addLikelySubtags(myStatus);
         useLocale = Locale(max.getLanguage(),max.getCountry());
     } else {
         useLocale = desiredLocale;
index e0f19f068afc487213488cb63b45d417741920cc..fcc5977c56d78901e8c3e99c94550baeb386393d 100644 (file)
@@ -615,16 +615,11 @@ U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UEr
 
 void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) {
     if (U_FAILURE(status)) { return; }
-    const char *localeID = locale.getName();
-    char maxLocaleID[ULOC_FULLNAME_CAPACITY];
-    int32_t length = uloc_addLikelySubtags(localeID, maxLocaleID, ULOC_FULLNAME_CAPACITY, &status);
+    Locale maxLocale(locale);
+    maxLocale.addLikelySubtags(status);
     if (U_FAILURE(status)) {
         return;
-    } else if (length == ULOC_FULLNAME_CAPACITY) {  // no room for NUL
-        status = U_BUFFER_OVERFLOW_ERROR;
-        return;
     }
-    Locale maxLocale = Locale(maxLocaleID);
 
     const char *country = maxLocale.getCountry();
     if (*country == '\0') { country = "001"; }