ICU-21035 add & use CharString::extract(dest buffer)

author Markus Scherer <markus.icu@gmail.com>

Fri, 21 Aug 2020 19:19:41 +0000 (19:19 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Fri, 21 Aug 2020 23:29:25 +0000 (16:29 -0700)
author Markus Scherer <markus.icu@gmail.com>
Fri, 21 Aug 2020 19:19:41 +0000 (19:19 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Fri, 21 Aug 2020 23:29:25 +0000 (16:29 -0700)
diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp

index 4878c737fb5944fda08abb919d35ddfcf4776724..318a185b3f1d645cb31b9db10d4b7bb6bfc27040 100644 (file)
--- a/icu4c/source/common/charstr.cpp
+++ b/icu4c/source/common/charstr.cpp
@@ -20,6 +20,7 @@
  #include "cmemory.h"
  #include "cstring.h"
  #include "uinvchar.h"
+#include "ustr_imp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -46,6 +47,19 @@ char *CharString::cloneData(UErrorCode &errorCode) const {
      return p;
  }
  
+int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return len; }
+    if (capacity < 0 || (capacity > 0 && dest == nullptr)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return len;
+    }
+    const char *src = buffer.getAlias();
+    if (0 < len && len <= capacity && src != dest) {
+        uprv_memcpy(dest, src, len);
+    }
+    return u_terminateChars(dest, capacity, len, &errorCode);
+}
+
  CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
      if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
          len=s.len;
diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h

index 9400fbb5461f0980b4c248e41bf55f3c9e95f589..6619faac618193bce16f166bba844a148c73b30e 100644 (file)
--- a/icu4c/source/common/charstr.h
+++ b/icu4c/source/common/charstr.h
@@ -87,6 +87,22 @@ public:
       * The caller must uprv_free() the result.
       */
      char *cloneData(UErrorCode &errorCode) const;
+    /**
+     * Copies the contents of the string into dest.
+     * Checks if there is enough space in dest, extracts the entire string if possible,
+     * and NUL-terminates dest if possible.
+     *
+     * If the string fits into dest but cannot be NUL-terminated (length()==capacity),
+     * then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+     * If the string itself does not fit into dest (length()>capacity),
+     * then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+     *
+     * @param dest Destination string buffer.
+     * @param capacity Size of the dest buffer (number of chars).
+     * @param errorCode ICU error code.
+     * @return length()
+     */
+    int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
  
      bool operator==(StringPiece other) const {
          return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
diff --git a/icu4c/source/common/icuplug.cpp b/icu4c/source/common/icuplug.cpp

index c6439cc819a3229f129d2f27bb946bc4d374efbf..303bd4406e2a236bb75f64805c3b166b24db7bd1 100644 (file)
--- a/icu4c/source/common/icuplug.cpp
+++ b/icu4c/source/common/icuplug.cpp
@@ -782,8 +782,8 @@ uplug_init(UErrorCode *status) {
      /* plugin_file is not used for processing - it is only used 
         so that uplug_getPluginFile() works (i.e. icuinfo)
      */
-    uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
-        
+    pluginFile.extract(plugin_file, sizeof(plugin_file), *status);
+
  #if UPLUG_TRACE
      DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
  #endif
diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp

index ea0d98cd9e350402f6c3c57b37362cbf2e7bacb5..e87b06edcb42af289f3e5951ad90f98402fbfabd 100644 (file)
--- a/icu4c/source/common/loclikely.cpp
+++ b/icu4c/source/common/loclikely.cpp
@@ -464,18 +464,7 @@ parseTagString(
          goto error;
      }
  
-    {
-        icu::CharString result = ulocimp_getLanguage(position, &position, *err);
-        if (U_FAILURE(*err)) {
-            goto error;
-        }
-
-        subtagLength = result.length();
-        if (subtagLength <= *langLength) {
-            uprv_memcpy(lang, result.data(), subtagLength);
-        }
-        u_terminateChars(lang, *langLength, subtagLength, err);
-    }
+    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
  
      /*
       * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp

index 94cbaab220e629e37db313e0bac3f4a2b2455f17..45fab01917b6d24e1475744060ede533db1e7fac 100644 (file)
--- a/icu4c/source/common/uloc.cpp
+++ b/icu4c/source/common/uloc.cpp
@@ -50,9 +50,6 @@
  #include "uassert.h"
  #include "charstr.h"
  
-#include <algorithm>
-#include <stdio.h> /* for sprintf */
-
  U_NAMESPACE_USE
  
  /* ### Declarations **************************************************/
@@ -892,7 +889,6 @@ uloc_setKeywordValue(const char* keywordName,
      char* startSearchHere = NULL;
      char* keywordStart = NULL;
      CharString updatedKeysAndValues;
-    int32_t updatedKeysAndValuesLen;
      UBool handledInputKeyAndValue = FALSE;
      char keyValuePrefix = '@';
  
@@ -1072,18 +1068,10 @@ uloc_setKeywordValue(const char* keywordName,
          return bufLen;
      }
  
-    updatedKeysAndValuesLen = updatedKeysAndValues.length();
-    /* needLen = length of the part before '@' + length of updated key-value part including '@' */
-    needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
-    if(needLen >= bufferCapacity) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-        return needLen; /* no change */
-    }
-    if (updatedKeysAndValuesLen > 0) {
-        uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
-    }
-    buffer[needLen]=0;
-    return needLen;
+    // needLen = length of the part before '@'
+    needLen = (int32_t)(startSearchHere - buffer);
+    return needLen + updatedKeysAndValues.extract(
+                         startSearchHere, bufferCapacity - needLen, *status);
  }
  
  /* ### ID parsing implementation **************************************************/
@@ -1232,13 +1220,7 @@ ulocimp_getScript(const char *localeID,
                    char *script, int32_t scriptCapacity,
                    const char **pEnd) {
      ErrorCode status;
-    CharString result = ulocimp_getScript(localeID, pEnd, status);
-    if (status.isFailure()) {
-        return 0;
-    }
-    int32_t reslen = result.length();
-    uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
-    return reslen;
+    return ulocimp_getScript(localeID, pEnd, status).extract(script, scriptCapacity, status);
  }
  
  static CharString
@@ -1281,13 +1263,7 @@ ulocimp_getCountry(const char *localeID,
                     char *country, int32_t countryCapacity,
                     const char **pEnd) {
      ErrorCode status;
-    CharString result = ulocimp_getCountry(localeID, pEnd, status);
-    if (status.isFailure()) {
-        return 0;
-    }
-    int32_t reslen = result.length();
-    uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity));
-    return reslen;
+    return ulocimp_getCountry(localeID, pEnd, status).extract(country, countryCapacity, status);
  }
  
  /**
@@ -1744,16 +1720,7 @@ uloc_getLanguage(const char*    localeID,
          localeID=uloc_getDefault();
      }
  
-    CharString result = ulocimp_getLanguage(localeID, NULL, *err);
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    int32_t reslen = result.length();
-    if (reslen <= languageCapacity) {
-        uprv_memcpy(language, result.data(), reslen);
-    }
-    return u_terminateChars(language, languageCapacity, reslen, err);
+    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
  }
  
  U_CAPI int32_t U_EXPORT2
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp

index 5eed02c61148b8a4a672abfd1ec0e68f7ddb8c80..8faac32cb174f7a4194faf1611fa0c0fcf0f529f 100644 (file)
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -1413,7 +1413,7 @@ _appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool st
                          break;
                      }
  
-                    uprv_strcpy(pExtBuf, bcpValue);
+                    buf.extract(pExtBuf, resultCapacity, tmpStatus);
                      T_CString_toLowerCase(pExtBuf);
  
                      extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
diff --git a/icu4c/source/i18n/rulebasedcollator.cpp b/icu4c/source/i18n/rulebasedcollator.cpp

index 60acf17815a5ee4ba7214d8d02354472fb8966f7..917482d65bb4d651b0880f7b50f3a3d6d0fda184 100644 (file)
--- a/icu4c/source/i18n/rulebasedcollator.cpp
+++ b/icu4c/source/i18n/rulebasedcollator.cpp
@@ -1600,10 +1600,7 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
      appendSubtag(result, 'Z', subtag, length, errorCode);
  
      if(U_FAILURE(errorCode)) { return 0; }
-    if(result.length() <= capacity) {
-        uprv_memcpy(buffer, result.data(), result.length());
-    }
-    return u_terminateChars(buffer, capacity, result.length(), &errorCode);
+    return result.extract(buffer, capacity, errorCode);
  }
  
  UBool
diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp

index 92f332d6d06eb098da18c8331d2dc5b6e7079ad1..4dc81aebcc942d8b837b1a4400297712721c8c51 100644 (file)
--- a/icu4c/source/i18n/ucol_sit.cpp
+++ b/icu4c/source/i18n/ucol_sit.cpp
@@ -372,10 +372,7 @@ int32_t ucol_sit_dumpSpecs(CollatorSpec *s, char *destination, int32_t capacity,
                      }
                      len += s->entries[i].length();
                  } else {
-                    len += s->entries[i].length();
-                    if(len < capacity) {
-                        uprv_strncat(destination,s->entries[i].data(), s->entries[i].length());
-                    }
+                    len += s->entries[i].extract(destination + len, capacity - len, *status);
                  }
              }
          }
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c

index 29f4c82ec7ee8c50e5bfd957f7cbca2e219106ff..f87e518a59e2bcd676de61ce2e7f67d5079f774f 100644 (file)
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -2233,7 +2233,12 @@ static void TestKeywordSetError(void)
          strcpy(buffer,kwSetTestCases[i].l);
          status = U_ZERO_ERROR;
          res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status);
-        if(status != U_BUFFER_OVERFLOW_ERROR) {
+        if(res == blen) {
+            if(status != U_STRING_NOT_TERMINATED_WARNING) {
+                log_err("expected not terminated warning on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v);
+                return;
+            }
+        } else if(status != U_BUFFER_OVERFLOW_ERROR) {
              log_err("expected buffer overflow on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v);
              return;
          }
diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp

index 1185d31c35248e7e5db4e5ad68ceed2e07e10d7b..54b5a1930086ff5bd626d654cf41ece02ee92195 100644 (file)
--- a/icu4c/source/test/intltest/strtest.cpp
+++ b/icu4c/source/test/intltest/strtest.cpp
@@ -810,6 +810,27 @@ StringTest::TestCharString() {
                  "Long string over 40 characters to trigger heap allocation",
                  s3.data());
      }
+
+    {
+        // extract()
+        errorCode.reset();
+        CharString s("abc", errorCode);
+        char buffer[10];
+
+        s.extract(buffer, 10, errorCode);
+        assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
+        assertEquals("abc.extract(10) output", "abc", buffer);
+
+        strcpy(buffer, "012345");
+        s.extract(buffer, 3, errorCode);
+        assertEquals("abc.extract(3) not terminated",
+                     U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
+        assertEquals("abc.extract(3) output", "abc345", buffer);
+
+        strcpy(buffer, "012345");
+        s.extract(buffer, 2, errorCode);
+        assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    }
  }
  
  void
diff --git a/icu4c/source/tools/toolutil/pkg_genc.cpp b/icu4c/source/tools/toolutil/pkg_genc.cpp

index 825112881807fc91c2c7556682d914eecbb623bc..17347bac5d77e9ab430d51d1ac19e7da6bec451b 100644 (file)
--- a/icu4c/source/tools/toolutil/pkg_genc.cpp
+++ b/icu4c/source/tools/toolutil/pkg_genc.cpp
@@ -738,8 +738,8 @@ getOutFilename(
          exit(U_ILLEGAL_ARGUMENT_ERROR);
      }
  
-    uprv_strcpy(outFilename, outFilenameBuilder.data());
-    uprv_strcpy(entryName, entryNameBuilder.data());
+    outFilenameBuilder.extract(outFilename, outFilenameCapacity, status);
+    entryNameBuilder.extract(entryName, entryNameCapacity, status);
  }
  
  #ifdef CAN_GENERATE_OBJECTS
author	Markus Scherer <markus.icu@gmail.com>
	Fri, 21 Aug 2020 19:19:41 +0000 (19:19 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Fri, 21 Aug 2020 23:29:25 +0000 (16:29 -0700)
icu4c/source/common/charstr.cpp		patch \| blob \| history
icu4c/source/common/charstr.h		patch \| blob \| history
icu4c/source/common/icuplug.cpp		patch \| blob \| history
icu4c/source/common/loclikely.cpp		patch \| blob \| history
icu4c/source/common/uloc.cpp		patch \| blob \| history
icu4c/source/common/uloc_tag.cpp		patch \| blob \| history
icu4c/source/i18n/rulebasedcollator.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_sit.cpp		patch \| blob \| history
icu4c/source/test/cintltst/cloctst.c		patch \| blob \| history
icu4c/source/test/intltest/strtest.cpp		patch \| blob \| history
icu4c/source/tools/toolutil/pkg_genc.cpp		patch \| blob \| history