ICU-13597 Fixing safety of toUnicodeString() readonly aliases by moving that behavior...

author Shane Carr <shane@unicode.org>

Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)

committer Shane Carr <shane@unicode.org>

Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)
author Shane Carr <shane@unicode.org>
Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)
committer Shane Carr <shane@unicode.org>
Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)
diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h

index dd89f39acb0dd3a91473f0fa9879d74df7e4ff4b..af700d53f8e3a811ee83a4348ee8c9cd162c82e8 100644 (file)
--- a/icu4c/source/common/unicode/utypes.h
+++ b/icu4c/source/common/unicode/utypes.h
@@ -541,13 +541,14 @@ typedef enum UErrorCode {
      U_FORMAT_INEXACT_ERROR,           /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
  #ifndef U_HIDE_DRAFT_API
      U_NUMBER_ARG_OUTOFBOUNDS_ERROR,   /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */
+    U_NUMBER_SKELETON_SYNTAX_ERROR,   /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @draft ICU 62 */
  #endif  // U_HIDE_DRAFT_API
  #ifndef U_HIDE_DEPRECATED_API
      /**
       * One more than the highest normal formatting API error code.
       * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
       */
-    U_FMT_PARSE_ERROR_LIMIT = 0x10113,
+    U_FMT_PARSE_ERROR_LIMIT = 0x10114,
  #endif  // U_HIDE_DEPRECATED_API
  
      /*
diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp

index 5d6a0504ba682a32461ff20626e82fc2a87f0dd1..7531e465683342739164ff1164d2326417ed899d 100644 (file)
--- a/icu4c/source/common/utypes.cpp
+++ b/icu4c/source/common/utypes.cpp
@@ -126,7 +126,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
      "U_DEFAULT_KEYWORD_MISSING",
      "U_DECIMAL_NUMBER_SYNTAX_ERROR",
      "U_FORMAT_INEXACT_ERROR",
-    "U_NUMBER_ARG_OUTOFBOUNDS_ERROR"
+    "U_NUMBER_ARG_OUTOFBOUNDS_ERROR",
+    "U_NUMBER_SKELETON_SYNTAX_ERROR",
  };
  
  static const char * const
diff --git a/icu4c/source/i18n/number_capi.cpp b/icu4c/source/i18n/number_capi.cpp

index 7095fe0fd6fd9a72bec99798903ff53b66632a1f..f40b7dad5cbb35bae9545c577e3172beb7903820 100644 (file)
--- a/icu4c/source/i18n/number_capi.cpp
+++ b/icu4c/source/i18n/number_capi.cpp
@@ -155,7 +155,7 @@ unumf_resultToString(const UFormattedNumber* uresult, UChar* buffer, int32_t buf
          return result->string.length();
      }
  
-    return result->string.toUnicodeString().extract(buffer, bufferCapacity, *ec);
+    return result->string.toTempUnicodeString().extract(buffer, bufferCapacity, *ec);
  }
  
  U_CAPI void U_EXPORT2
diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp

index 0958d95c554d79cddf09cc59fef02a7c862ee0cf..444471ea3bdda4fe1c08c6aed4d9d59f27422286 100644 (file)
--- a/icu4c/source/i18n/number_skeletons.cpp
+++ b/icu4c/source/i18n/number_skeletons.cpp
@@ -774,7 +774,9 @@ bool
  blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) {
      // Get the sign display type out of the CharsTrie data structure.
      UCharsTrie tempStemTrie(kSerializedStemTrie);
-    UStringTrieResult result = tempStemTrie.next(segment.toUnicodeString().getBuffer(), segment.length());
+    UStringTrieResult result = tempStemTrie.next(
+            segment.toTempUnicodeString().getBuffer(),
+            segment.length());
      if (result != USTRINGTRIE_INTERMEDIATE_VALUE && result != USTRINGTRIE_FINAL_VALUE) {
          return false;
      }
@@ -788,6 +790,7 @@ blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroPr
  
  void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros,
                                              UErrorCode& status) {
+    // Can't use toTempUnicodeString() because getTerminatedBuffer is non-const
      const UChar* currencyCode = segment.toUnicodeString().getTerminatedBuffer();
      UErrorCode localStatus = U_ZERO_ERROR;
      CurrencyUnit currency(currencyCode, localStatus);
@@ -808,7 +811,7 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS
  
  void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros,
                                                 UErrorCode& status) {
-    UnicodeString stemString = segment.toUnicodeString();
+    const UnicodeString stemString = segment.toTempUnicodeString();
  
      // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
      // http://unicode.org/reports/tr35/#Validity_Data
@@ -1043,7 +1046,7 @@ void blueprint_helpers::parseIncrementOption(const StringSegment& segment, Macro
                                               UErrorCode& status) {
      // Need to do char <-> UChar conversion...
      CharString buffer;
-    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
  
      // Utilize DecimalQuantity/decNumber to parse this for us.
      DecimalQuantity dq;
@@ -1155,7 +1158,7 @@ void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment,
                                                     UErrorCode& status) {
      // Need to do char <-> UChar conversion...
      CharString buffer;
-    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status);
  
      NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status);
      if (ns == nullptr) {
diff --git a/icu4c/source/i18n/number_skeletons.h b/icu4c/source/i18n/number_skeletons.h

index aeae63f4d9fd6b98bfe7f5b145e03ddfad1ba923..14f1bdbe70952a8d12b54bf7af22274438ba4c32 100644 (file)
--- a/icu4c/source/i18n/number_skeletons.h
+++ b/icu4c/source/i18n/number_skeletons.h
@@ -16,8 +16,6 @@ using icu::numparse::impl::StringSegment;
  U_NAMESPACE_BEGIN namespace number {
  namespace impl {
  
-static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
-
  // Forward-declaration
  struct SeenMacroProps;
  
diff --git a/icu4c/source/i18n/number_stringbuilder.cpp b/icu4c/source/i18n/number_stringbuilder.cpp

index 2759ed456836cf43c148bcee4b2592e3ba3d2b7c..a1900deab82f9b70a16e00d651d1267ab851c727 100644 (file)
--- a/icu4c/source/i18n/number_stringbuilder.cpp
+++ b/icu4c/source/i18n/number_stringbuilder.cpp
@@ -334,6 +334,10 @@ int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
  }
  
  UnicodeString NumberStringBuilder::toUnicodeString() const {
+    return UnicodeString(getCharPtr() + fZero, fLength);
+}
+
+const UnicodeString NumberStringBuilder::toTempUnicodeString() const {
      // Readonly-alias constructor:
      return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
  }
diff --git a/icu4c/source/i18n/number_stringbuilder.h b/icu4c/source/i18n/number_stringbuilder.h

index a97cc9ca026ad09a6c026181ec0bd791bcdfbe3d..f92547679cae4508da7399e00056670ec3dcebe8 100644 (file)
--- a/icu4c/source/i18n/number_stringbuilder.h
+++ b/icu4c/source/i18n/number_stringbuilder.h
@@ -84,8 +84,17 @@ class U_I18N_API NumberStringBuilder : public UMemory {
  
      int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status);
  
+    /**
+     * Gets a "safe" UnicodeString that can be used even after the NumberStringBuilder is destructed.
+     * */
      UnicodeString toUnicodeString() const;
  
+    /**
+     * Gets an "unsafe" UnicodeString that is valid only as long as the NumberStringBuilder is alive and
+     * unchanged. Slightly faster than toUnicodeString().
+     */
+    const UnicodeString toTempUnicodeString() const;
+
      UnicodeString toDebugString() const;
  
      const char16_t *chars() const;
diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h

index ac0188f49824492956a514b82d772aaebc2367fb..1081919c4cc1814862e96c16e12e4e93c351d4ef 100644 (file)
--- a/icu4c/source/i18n/number_types.h
+++ b/icu4c/source/i18n/number_types.h
@@ -111,7 +111,16 @@ class U_I18N_API CharSequence {
          }
      }
  
+    /**
+     * Gets a "safe" UnicodeString that can be used even after the CharSequence is destructed.
+     * */
      virtual UnicodeString toUnicodeString() const = 0;
+
+    /**
+     * Gets an "unsafe" UnicodeString that is valid only as long as the CharSequence is alive and
+     * unchanged. Slightly faster than toUnicodeString().
+     */
+    virtual const UnicodeString toTempUnicodeString() const = 0;
  };
  
  class U_I18N_API AffixPatternProvider {
diff --git a/icu4c/source/i18n/number_utils.h b/icu4c/source/i18n/number_utils.h

index 60879db085776863aea5ad4ecbc41f3d5709484a..8f74692cc6129837dca3608170691f6bebfaedf1 100644 (file)
--- a/icu4c/source/i18n/number_utils.h
+++ b/icu4c/source/i18n/number_utils.h
@@ -39,12 +39,13 @@ class UnicodeStringCharSequence : public CharSequence {
      }
  
      UnicodeString toUnicodeString() const U_OVERRIDE {
-        // Allocate a UnicodeString of the correct length
-        UnicodeString output(length(), 0, -1);
-        for (int32_t i = 0; i < length(); i++) {
-            output.append(charAt(i));
-        }
-        return output;
+        // Performs a copy:
+        return fStr;
+    }
+
+    const UnicodeString toTempUnicodeString() const U_OVERRIDE {
+        // Readonly alias:
+        return UnicodeString().fastCopyFrom(fStr);
      }
  
    private:
diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp

index 61f07acce2f22fc87f27b6e4e5aeb676ecf87986..504df74e369357ee867a22100d387e1646accbfc 100644 (file)
--- a/icu4c/source/i18n/numparse_currency.cpp
+++ b/icu4c/source/i18n/numparse_currency.cpp
@@ -33,9 +33,8 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U
          return false;
      }
  
-    // NOTE: This requires a new UnicodeString to be allocated, instead of using the StringSegment.
-    // This should be fixed with #13584.
-    UnicodeString segmentString = segment.toUnicodeString();
+    // NOTE: This call site should be improved with #13584.
+    const UnicodeString segmentString = segment.toTempUnicodeString();
  
      // Try to parse the currency
      ParsePosition ppos(0);
diff --git a/icu4c/source/i18n/numparse_stringsegment.cpp b/icu4c/source/i18n/numparse_stringsegment.cpp

index 0a6e4fd104916ba247c4c25cf934d4171c543b29..6f35deb7e719d655addbc6a23aa5955509860e36 100644 (file)
--- a/icu4c/source/i18n/numparse_stringsegment.cpp
+++ b/icu4c/source/i18n/numparse_stringsegment.cpp
@@ -61,6 +61,10 @@ UChar32 StringSegment::codePointAt(int32_t index) const {
  }
  
  UnicodeString StringSegment::toUnicodeString() const {
+    return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);
+}
+
+const UnicodeString StringSegment::toTempUnicodeString() const {
      // Use the readonly-aliasing constructor for efficiency.
      return UnicodeString(FALSE, fStr.getBuffer() + fStart, fEnd - fStart);
  }
@@ -134,7 +138,7 @@ bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
  }
  
  bool StringSegment::operator==(const UnicodeString& other) const {
-    return toUnicodeString() == other;
+    return toTempUnicodeString() == other;
  }
  
  
diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h

index b02bb249ce6f11c3ad8463dc14c564017dcaa2b9..420a5f88c53eea612ea7d9f247659c076778e497 100644 (file)
--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@@ -203,6 +203,8 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence {
  
      UnicodeString toUnicodeString() const override;
  
+    const UnicodeString toTempUnicodeString() const override;
+
      /**
       * Returns the first code point in the string segment, or -1 if the string starts with an invalid
       * code point.
diff --git a/icu4c/source/i18n/unicode/unum.h b/icu4c/source/i18n/unicode/unum.h

index 99864a169bab8af4f5abc0b38f6cf8538ebf6ed5..a8894a0052d52a98f031157e84c902d89ed6c211 100644 (file)
--- a/icu4c/source/i18n/unicode/unum.h
+++ b/icu4c/source/i18n/unicode/unum.h
@@ -29,12 +29,13 @@
  
  /**
   * \file
- * \brief C API: NumberFormat
+ * \brief C API: Compatibility APIs for number formatting.
   *
   * <h2> Number Format C API </h2>
   * 
- * <p><strong>IMPORTANT:</strong> New users with C++ capabilities are
- * strongly encouraged to see if numberformatter.h fits their use case.
+ * <p><strong>IMPORTANT:</strong> New users with are strongly encouraged to
+ * see if unumberformatter.h fits their use case.  Although not deprecated,
+ * this header is provided for backwards compatibility only.
   *
   * Number Format C API  Provides functions for
   * formatting and parsing a number.  Also provides methods for
@@ -399,6 +400,10 @@ typedef enum UNumberFormatFields {
   * number format is opened using the given pattern, which must conform
   * to the syntax described in DecimalFormat or RuleBasedNumberFormat,
   * respectively.
+ *
+ * <p><strong>NOTE::</strong> New users with are strongly encouraged to
+ * use unumf_openWithSkeletonAndLocale instead of unum_open.
+ *
   * @param pattern A pattern specifying the format to use. 
   * This parameter is ignored unless the style is
   * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED.
diff --git a/icu4c/source/test/intltest/numbertest_affixutils.cpp b/icu4c/source/test/intltest/numbertest_affixutils.cpp

index 63c155ca49653ab7c4aaebf9db39fd0f5591e475..1d0dcc710b8aa75f2412d3a594ced0601722ca8c 100644 (file)
--- a/icu4c/source/test/intltest/numbertest_affixutils.cpp
+++ b/icu4c/source/test/intltest/numbertest_affixutils.cpp
@@ -226,6 +226,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() {
          AffixUtils::unescape(UnicodeStringCharSequence(input), sb, 0, provider, status);
          assertSuccess("Spot 1", status);
          assertEquals(input, expected, sb.toUnicodeString());
+        assertEquals(input, expected, sb.toTempUnicodeString());
      }
  
      // Test insertion position
diff --git a/icu4c/source/test/intltest/numbertest_stringsegment.cpp b/icu4c/source/test/intltest/numbertest_stringsegment.cpp

index 665bc7c52b0225dcd4fdc09cb11f0a40521db515..6c11df8f96b717662da4a835147295022a3c6fcf 100644 (file)
--- a/icu4c/source/test/intltest/numbertest_stringsegment.cpp
+++ b/icu4c/source/test/intltest/numbertest_stringsegment.cpp
@@ -50,10 +50,13 @@ void StringSegmentTest::testLength() {
  void StringSegmentTest::testCharAt() {
      StringSegment segment(SAMPLE_STRING, 0);
      assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString());
+    assertEquals("Initial", SAMPLE_STRING, segment.toTempUnicodeString());
      segment.adjustOffset(3);
      assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString());
+    assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toTempUnicodeString());
      segment.setLength(5);
      assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toUnicodeString());
+    assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toTempUnicodeString());
  }
  
  void StringSegmentTest::testGetCodePoint() {
author	Shane Carr <shane@unicode.org>
	Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)
committer	Shane Carr <shane@unicode.org>
	Wed, 28 Mar 2018 03:42:12 +0000 (03:42 +0000)
icu4c/source/common/unicode/utypes.h		patch \| blob \| history
icu4c/source/common/utypes.cpp		patch \| blob \| history
icu4c/source/i18n/number_capi.cpp		patch \| blob \| history
icu4c/source/i18n/number_skeletons.cpp		patch \| blob \| history
icu4c/source/i18n/number_skeletons.h		patch \| blob \| history
icu4c/source/i18n/number_stringbuilder.cpp		patch \| blob \| history
icu4c/source/i18n/number_stringbuilder.h		patch \| blob \| history
icu4c/source/i18n/number_types.h		patch \| blob \| history
icu4c/source/i18n/number_utils.h		patch \| blob \| history
icu4c/source/i18n/numparse_currency.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_stringsegment.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_types.h		patch \| blob \| history
icu4c/source/i18n/unicode/unum.h		patch \| blob \| history
icu4c/source/test/intltest/numbertest_affixutils.cpp		patch \| blob \| history
icu4c/source/test/intltest/numbertest_stringsegment.cpp		patch \| blob \| history