]> granicus.if.org Git - icu/commitdiff
ICU-20158 Pass ByteSink from Locale::forLanguageTag() to uloc_forLanguageTag().
authorFredrik Roubert <roubert@google.com>
Tue, 30 Oct 2018 22:35:03 +0000 (23:35 +0100)
committerFredrik Roubert <fredrik@roubert.name>
Wed, 31 Oct 2018 11:21:15 +0000 (12:21 +0100)
This eliminates the need for a scratch buffer in Locale::forLanguageTag()
and also the need for counting bytes required in uloc_forLanguageTag(),
something that ByteSink will now handle correctly.

icu4c/source/common/locid.cpp
icu4c/source/common/uloc_tag.cpp
icu4c/source/common/ulocimp.h

index 90b4a4bc87b56e7c1e1f28d035dcb7119eedab2c..6d38e4668bf515e8b132895278e29c31b5074c59 100644 (file)
@@ -869,43 +869,16 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
     // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
     // and then Locale::init(), instead of just calling the normal constructor.
 
-    // All simple language tags will have the exact same length as ICU locale
-    // ID strings as they have as BCP-47 strings (like "en_US" for "en-US").
     CharString localeID;
-    int32_t resultCapacity = tag.size();
-
-    char* buffer;
-    int32_t parsedLength, reslen;
-
-    for (;;) {
-        buffer = localeID.getAppendBuffer(
-                /*minCapacity=*/resultCapacity,
-                /*desiredCapacityHint=*/resultCapacity,
-                resultCapacity,
-                status);
-
-        if (U_FAILURE(status)) {
-            return result;
-        }
-
-        reslen = ulocimp_forLanguageTag(
+    int32_t parsedLength;
+    {
+        CharStringByteSink sink(&localeID);
+        ulocimp_forLanguageTag(
                 tag.data(),
                 tag.length(),
-                buffer,
-                resultCapacity,
+                sink,
                 &parsedLength,
                 &status);
-
-        if (status != U_BUFFER_OVERFLOW_ERROR) {
-            break;
-        }
-
-        // For all BCP-47 language tags that use extensions, the corresponding
-        // ICU locale ID will be longer but uloc_forLanguageTag() does compute
-        // the exact length needed so this memory reallocation will be done at
-        // most once.
-        resultCapacity = reslen;
-        status = U_ZERO_ERROR;
     }
 
     if (U_FAILURE(status)) {
@@ -917,15 +890,6 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
         return result;
     }
 
-    localeID.append(buffer, reslen, status);
-    if (status == U_STRING_NOT_TERMINATED_WARNING) {
-        status = U_ZERO_ERROR;  // Terminators provided by CharString.
-    }
-
-    if (U_FAILURE(status)) {
-        return result;
-    }
-
     result.init(localeID.data(), /*canonicalize=*/FALSE);
     if (result.isBogus()) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
index 5632a432bdd43ce0fd64cf42bb8d77d4e5fec9e9..c44a621cfc760bf48b87b898cf6ffded43398dd6 100644 (file)
@@ -1645,9 +1645,8 @@ cleanup:
 }
 
 
-static int32_t
-_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
-    int32_t reslen = 0;
+static void
+_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) {
     int32_t i, n;
     int32_t len;
     ExtensionListEntry *kwdFirst = NULL;
@@ -1657,7 +1656,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
     UBool posixVariant = FALSE;
 
     if (U_FAILURE(*status)) {
-        return 0;
+        return;
     }
 
     /* Determine if variants already exists */
@@ -1714,10 +1713,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
 
     if (U_SUCCESS(*status) && posixVariant) {
         len = (int32_t) uprv_strlen(_POSIX);
-        if (reslen < capacity) {
-            uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
-        }
-        reslen += len;
+        sink.Append(_POSIX, len);
     }
 
     if (U_SUCCESS(*status) && kwdFirst != NULL) {
@@ -1725,37 +1721,21 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
         UBool firstValue = TRUE;
         kwd = kwdFirst;
         do {
-            if (reslen < capacity) {
-                if (firstValue) {
-                    /* '@' */
-                    *(appendAt + reslen) = LOCALE_EXT_SEP;
-                    firstValue = FALSE;
-                } else {
-                    /* ';' */
-                    *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
-                }
+            if (firstValue) {
+                sink.Append("@", 1);
+                firstValue = FALSE;
+            } else {
+                sink.Append(";", 1);
             }
-            reslen++;
 
             /* key */
             len = (int32_t)uprv_strlen(kwd->key);
-            if (reslen < capacity) {
-                uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
-            }
-            reslen += len;
-
-            /* '=' */
-            if (reslen < capacity) {
-                *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
-            }
-            reslen++;
+            sink.Append(kwd->key, len);
+            sink.Append("=", 1);
 
             /* type */
             len = (int32_t)uprv_strlen(kwd->value);
-            if (reslen < capacity) {
-                uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
-            }
-            reslen += len;
+            sink.Append(kwd->value, len);
 
             kwd = kwd->next;
         } while (kwd);
@@ -1770,10 +1750,8 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
     }
 
     if (U_FAILURE(*status)) {
-        return 0;
+        return;
     }
-
-    return u_terminateChars(appendAt, capacity, reslen, status);
 }
 
 static void
@@ -2638,25 +2616,37 @@ uloc_forLanguageTag(const char* langtag,
                     int32_t localeIDCapacity,
                     int32_t* parsedLength,
                     UErrorCode* status) {
-    return ulocimp_forLanguageTag(
-            langtag,
-            -1,
-            localeID,
-            localeIDCapacity,
-            parsedLength,
-            status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    icu::CheckedArrayByteSink sink(localeID, localeIDCapacity);
+    ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status);
+
+    int32_t reslen = sink.NumberOfBytesAppended();
+
+    if (U_FAILURE(*status)) {
+        return reslen;
+    }
+
+    if (sink.Overflowed()) {
+        *status = U_BUFFER_OVERFLOW_ERROR;
+    } else {
+        u_terminateChars(localeID, localeIDCapacity, reslen, status);
+    }
+
+    return reslen;
 }
 
 
-U_CAPI int32_t U_EXPORT2
+U_CAPI void U_EXPORT2
 ulocimp_forLanguageTag(const char* langtag,
                        int32_t tagLen,
-                       char* localeID,
-                       int32_t localeIDCapacity,
+                       icu::ByteSink& sink,
                        int32_t* parsedLength,
                        UErrorCode* status) {
     ULanguageTag *lt;
-    int32_t reslen = 0;
+    UBool isEmpty = TRUE;
     const char *subtag, *p;
     int32_t len;
     int32_t i, n;
@@ -2664,7 +2654,7 @@ ulocimp_forLanguageTag(const char* langtag,
 
     lt = ultag_parse(langtag, tagLen, parsedLength, status);
     if (U_FAILURE(*status)) {
-        return 0;
+        return;
     }
 
     /* language */
@@ -2672,10 +2662,8 @@ ulocimp_forLanguageTag(const char* langtag,
     if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
         len = (int32_t)uprv_strlen(subtag);
         if (len > 0) {
-            if (reslen < localeIDCapacity) {
-                uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
-            }
-            reslen += len;
+            sink.Append(subtag, len);
+            isEmpty = FALSE;
         }
     }
 
@@ -2683,41 +2671,27 @@ ulocimp_forLanguageTag(const char* langtag,
     subtag = ultag_getScript(lt);
     len = (int32_t)uprv_strlen(subtag);
     if (len > 0) {
-        if (reslen < localeIDCapacity) {
-            *(localeID + reslen) = LOCALE_SEP;
-        }
-        reslen++;
+        sink.Append("_", 1);
+        isEmpty = FALSE;
 
         /* write out the script in title case */
-        p = subtag;
-        while (*p) {
-            if (reslen < localeIDCapacity) {
-                if (p == subtag) {
-                    *(localeID + reslen) = uprv_toupper(*p);
-                } else {
-                    *(localeID + reslen) = *p;
-                }
-            }
-            reslen++;
-            p++;
-        }
+        char c = uprv_toupper(*subtag);
+        sink.Append(&c, 1);
+        sink.Append(subtag + 1, len - 1);
     }
 
     /* region */
     subtag = ultag_getRegion(lt);
     len = (int32_t)uprv_strlen(subtag);
     if (len > 0) {
-        if (reslen < localeIDCapacity) {
-            *(localeID + reslen) = LOCALE_SEP;
-        }
-        reslen++;
-        /* write out the retion in upper case */
+        sink.Append("_", 1);
+        isEmpty = FALSE;
+
+        /* write out the region in upper case */
         p = subtag;
         while (*p) {
-            if (reslen < localeIDCapacity) {
-                *(localeID + reslen) = uprv_toupper(*p);
-            }
-            reslen++;
+            char c = uprv_toupper(*p);
+            sink.Append(&c, 1);
             p++;
         }
         noRegion = FALSE;
@@ -2727,25 +2701,19 @@ ulocimp_forLanguageTag(const char* langtag,
     n = ultag_getVariantsSize(lt);
     if (n > 0) {
         if (noRegion) {
-            if (reslen < localeIDCapacity) {
-                *(localeID + reslen) = LOCALE_SEP;
-            }
-            reslen++;
+            sink.Append("_", 1);
+            isEmpty = FALSE;
         }
 
         for (i = 0; i < n; i++) {
             subtag = ultag_getVariant(lt, i);
-            if (reslen < localeIDCapacity) {
-                *(localeID + reslen) = LOCALE_SEP;
-            }
-            reslen++;
+            sink.Append("_", 1);
+
             /* write out the variant in upper case */
             p = subtag;
             while (*p) {
-                if (reslen < localeIDCapacity) {
-                    *(localeID + reslen) = uprv_toupper(*p);
-                }
-                reslen++;
+                char c = uprv_toupper(*p);
+                sink.Append(&c, 1);
                 p++;
             }
         }
@@ -2755,19 +2723,12 @@ ulocimp_forLanguageTag(const char* langtag,
     n = ultag_getExtensionsSize(lt);
     subtag = ultag_getPrivateUse(lt);
     if (n > 0 || uprv_strlen(subtag) > 0) {
-        if (reslen == 0 && n > 0) {
+        if (isEmpty && n > 0) {
             /* need a language */
-            if (reslen < localeIDCapacity) {
-                uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
-            }
-            reslen += LANG_UND_LEN;
+            sink.Append(LANG_UND, LANG_UND_LEN);
         }
-        len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
-        reslen += len;
+        _appendKeywords(lt, sink, status);
     }
 
     ultag_close(lt);
-    return u_terminateChars(localeID, localeIDCapacity, reslen, status);
 }
-
-
index 0e69a22e7aa609f78ef9656dc24844ec3dc6592d..f268f8995aaa5a6b7b653c56473890ad00bc00d2 100644 (file)
@@ -101,21 +101,18 @@ ulocimp_toLanguageTag(const char* localeID,
  * the first paragraph, so some information might be lost.
  * @param langtag   the input BCP47 language tag.
  * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
- * @param localeID  the output buffer receiving a locale ID for the
+ * @param sink      the output sink receiving a locale ID for the
  *                  specified BCP47 language tag.
- * @param localeIDCapacity  the size of the locale ID output buffer.
  * @param parsedLength  if not NULL, successfully parsed length
  *                      for the input language tag is set.
  * @param err       error information if receiving the locald ID
  *                  failed.
- * @return          the length of the locale ID.
  * @internal ICU 63
  */
-U_CAPI int32_t U_EXPORT2
+U_CAPI void U_EXPORT2
 ulocimp_forLanguageTag(const char* langtag,
                        int32_t tagLen,
-                       char* localeID,
-                       int32_t localeIDCapacity,
+                       icu::ByteSink& sink,
                        int32_t* parsedLength,
                        UErrorCode* err);