From: Shane Carr Date: Wed, 28 Mar 2018 03:42:12 +0000 (+0000) Subject: ICU-13597 Fixing safety of toUnicodeString() readonly aliases by moving that behavior... X-Git-Tag: release-62-rc~200^2~64 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2ede84ce4742c60a14a3fd7354f9cee8267f3bae;p=icu ICU-13597 Fixing safety of toUnicodeString() readonly aliases by moving that behavior to a new method, toTempUnicodeString(). X-SVN-Rev: 41164 --- diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index dd89f39acb0..af700d53f8e 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -541,13 +541,14 @@ typedef enum UErrorCode { U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ #ifndef U_HIDE_DRAFT_API U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ + U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @draft ICU 62 */ #endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10113, + U_FMT_PARSE_ERROR_LIMIT = 0x10114, #endif // U_HIDE_DEPRECATED_API /* diff --git a/icu4c/source/common/utypes.cpp b/icu4c/source/common/utypes.cpp index 5d6a0504ba6..7531e465683 100644 --- a/icu4c/source/common/utypes.cpp +++ b/icu4c/source/common/utypes.cpp @@ -126,7 +126,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_DEFAULT_KEYWORD_MISSING", "U_DECIMAL_NUMBER_SYNTAX_ERROR", "U_FORMAT_INEXACT_ERROR", - "U_NUMBER_ARG_OUTOFBOUNDS_ERROR" + "U_NUMBER_ARG_OUTOFBOUNDS_ERROR", + "U_NUMBER_SKELETON_SYNTAX_ERROR", }; static const char * const diff --git a/icu4c/source/i18n/number_capi.cpp b/icu4c/source/i18n/number_capi.cpp index 7095fe0fd6f..f40b7dad5cb 100644 --- a/icu4c/source/i18n/number_capi.cpp +++ b/icu4c/source/i18n/number_capi.cpp @@ -155,7 +155,7 @@ unumf_resultToString(const UFormattedNumber* uresult, UChar* buffer, int32_t buf return result->string.length(); } - return result->string.toUnicodeString().extract(buffer, bufferCapacity, *ec); + return result->string.toTempUnicodeString().extract(buffer, bufferCapacity, *ec); } U_CAPI void U_EXPORT2 diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 0958d95c554..444471ea3bd 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -774,7 +774,9 @@ bool blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode&) { // Get the sign display type out of the CharsTrie data structure. UCharsTrie tempStemTrie(kSerializedStemTrie); - UStringTrieResult result = tempStemTrie.next(segment.toUnicodeString().getBuffer(), segment.length()); + UStringTrieResult result = tempStemTrie.next( + segment.toTempUnicodeString().getBuffer(), + segment.length()); if (result != USTRINGTRIE_INTERMEDIATE_VALUE && result != USTRINGTRIE_FINAL_VALUE) { return false; } @@ -788,6 +790,7 @@ blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroPr void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { + // Can't use toTempUnicodeString() because getTerminatedBuffer is non-const const UChar* currencyCode = segment.toUnicodeString().getTerminatedBuffer(); UErrorCode localStatus = U_ZERO_ERROR; CurrencyUnit currency(currencyCode, localStatus); @@ -808,7 +811,7 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { - UnicodeString stemString = segment.toUnicodeString(); + const UnicodeString stemString = segment.toTempUnicodeString(); // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric) // http://unicode.org/reports/tr35/#Validity_Data @@ -1043,7 +1046,7 @@ void blueprint_helpers::parseIncrementOption(const StringSegment& segment, Macro UErrorCode& status) { // Need to do char <-> UChar conversion... CharString buffer; - SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status); + SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); // Utilize DecimalQuantity/decNumber to parse this for us. DecimalQuantity dq; @@ -1155,7 +1158,7 @@ void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, UErrorCode& status) { // Need to do char <-> UChar conversion... CharString buffer; - SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status); + SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status); if (ns == nullptr) { diff --git a/icu4c/source/i18n/number_skeletons.h b/icu4c/source/i18n/number_skeletons.h index aeae63f4d9f..14f1bdbe709 100644 --- a/icu4c/source/i18n/number_skeletons.h +++ b/icu4c/source/i18n/number_skeletons.h @@ -16,8 +16,6 @@ using icu::numparse::impl::StringSegment; U_NAMESPACE_BEGIN namespace number { namespace impl { -static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR; - // Forward-declaration struct SeenMacroProps; diff --git a/icu4c/source/i18n/number_stringbuilder.cpp b/icu4c/source/i18n/number_stringbuilder.cpp index 2759ed45683..a1900deab82 100644 --- a/icu4c/source/i18n/number_stringbuilder.cpp +++ b/icu4c/source/i18n/number_stringbuilder.cpp @@ -334,6 +334,10 @@ int32_t NumberStringBuilder::remove(int32_t index, int32_t count) { } UnicodeString NumberStringBuilder::toUnicodeString() const { + return UnicodeString(getCharPtr() + fZero, fLength); +} + +const UnicodeString NumberStringBuilder::toTempUnicodeString() const { // Readonly-alias constructor: return UnicodeString(FALSE, getCharPtr() + fZero, fLength); } diff --git a/icu4c/source/i18n/number_stringbuilder.h b/icu4c/source/i18n/number_stringbuilder.h index a97cc9ca026..f92547679ca 100644 --- a/icu4c/source/i18n/number_stringbuilder.h +++ b/icu4c/source/i18n/number_stringbuilder.h @@ -84,8 +84,17 @@ class U_I18N_API NumberStringBuilder : public UMemory { int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status); + /** + * Gets a "safe" UnicodeString that can be used even after the NumberStringBuilder is destructed. + * */ UnicodeString toUnicodeString() const; + /** + * Gets an "unsafe" UnicodeString that is valid only as long as the NumberStringBuilder is alive and + * unchanged. Slightly faster than toUnicodeString(). + */ + const UnicodeString toTempUnicodeString() const; + UnicodeString toDebugString() const; const char16_t *chars() const; diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h index ac0188f4982..1081919c4cc 100644 --- a/icu4c/source/i18n/number_types.h +++ b/icu4c/source/i18n/number_types.h @@ -111,7 +111,16 @@ class U_I18N_API CharSequence { } } + /** + * Gets a "safe" UnicodeString that can be used even after the CharSequence is destructed. + * */ virtual UnicodeString toUnicodeString() const = 0; + + /** + * Gets an "unsafe" UnicodeString that is valid only as long as the CharSequence is alive and + * unchanged. Slightly faster than toUnicodeString(). + */ + virtual const UnicodeString toTempUnicodeString() const = 0; }; class U_I18N_API AffixPatternProvider { diff --git a/icu4c/source/i18n/number_utils.h b/icu4c/source/i18n/number_utils.h index 60879db0857..8f74692cc61 100644 --- a/icu4c/source/i18n/number_utils.h +++ b/icu4c/source/i18n/number_utils.h @@ -39,12 +39,13 @@ class UnicodeStringCharSequence : public CharSequence { } UnicodeString toUnicodeString() const U_OVERRIDE { - // Allocate a UnicodeString of the correct length - UnicodeString output(length(), 0, -1); - for (int32_t i = 0; i < length(); i++) { - output.append(charAt(i)); - } - return output; + // Performs a copy: + return fStr; + } + + const UnicodeString toTempUnicodeString() const U_OVERRIDE { + // Readonly alias: + return UnicodeString().fastCopyFrom(fStr); } private: diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp index 61f07acce2f..504df74e369 100644 --- a/icu4c/source/i18n/numparse_currency.cpp +++ b/icu4c/source/i18n/numparse_currency.cpp @@ -33,9 +33,8 @@ bool CurrencyNamesMatcher::match(StringSegment& segment, ParsedNumber& result, U return false; } - // NOTE: This requires a new UnicodeString to be allocated, instead of using the StringSegment. - // This should be fixed with #13584. - UnicodeString segmentString = segment.toUnicodeString(); + // NOTE: This call site should be improved with #13584. + const UnicodeString segmentString = segment.toTempUnicodeString(); // Try to parse the currency ParsePosition ppos(0); diff --git a/icu4c/source/i18n/numparse_stringsegment.cpp b/icu4c/source/i18n/numparse_stringsegment.cpp index 0a6e4fd1049..6f35deb7e71 100644 --- a/icu4c/source/i18n/numparse_stringsegment.cpp +++ b/icu4c/source/i18n/numparse_stringsegment.cpp @@ -61,6 +61,10 @@ UChar32 StringSegment::codePointAt(int32_t index) const { } UnicodeString StringSegment::toUnicodeString() const { + return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart); +} + +const UnicodeString StringSegment::toTempUnicodeString() const { // Use the readonly-aliasing constructor for efficiency. return UnicodeString(FALSE, fStr.getBuffer() + fStart, fEnd - fStart); } @@ -134,7 +138,7 @@ bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) { } bool StringSegment::operator==(const UnicodeString& other) const { - return toUnicodeString() == other; + return toTempUnicodeString() == other; } diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h index b02bb249ce6..420a5f88c53 100644 --- a/icu4c/source/i18n/numparse_types.h +++ b/icu4c/source/i18n/numparse_types.h @@ -203,6 +203,8 @@ class StringSegment : public UMemory, public ::icu::number::impl::CharSequence { UnicodeString toUnicodeString() const override; + const UnicodeString toTempUnicodeString() const override; + /** * Returns the first code point in the string segment, or -1 if the string starts with an invalid * code point. diff --git a/icu4c/source/i18n/unicode/unum.h b/icu4c/source/i18n/unicode/unum.h index 99864a169ba..a8894a0052d 100644 --- a/icu4c/source/i18n/unicode/unum.h +++ b/icu4c/source/i18n/unicode/unum.h @@ -29,12 +29,13 @@ /** * \file - * \brief C API: NumberFormat + * \brief C API: Compatibility APIs for number formatting. * *

Number Format C API

* - *

IMPORTANT: New users with C++ capabilities are - * strongly encouraged to see if numberformatter.h fits their use case. + *

IMPORTANT: New users with are strongly encouraged to + * see if unumberformatter.h fits their use case. Although not deprecated, + * this header is provided for backwards compatibility only. * * Number Format C API Provides functions for * formatting and parsing a number. Also provides methods for @@ -399,6 +400,10 @@ typedef enum UNumberFormatFields { * number format is opened using the given pattern, which must conform * to the syntax described in DecimalFormat or RuleBasedNumberFormat, * respectively. + * + *

NOTE:: New users with are strongly encouraged to + * use unumf_openWithSkeletonAndLocale instead of unum_open. + * * @param pattern A pattern specifying the format to use. * This parameter is ignored unless the style is * UNUM_PATTERN_DECIMAL or UNUM_PATTERN_RULEBASED. diff --git a/icu4c/source/test/intltest/numbertest_affixutils.cpp b/icu4c/source/test/intltest/numbertest_affixutils.cpp index 63c155ca496..1d0dcc710b8 100644 --- a/icu4c/source/test/intltest/numbertest_affixutils.cpp +++ b/icu4c/source/test/intltest/numbertest_affixutils.cpp @@ -226,6 +226,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() { AffixUtils::unescape(UnicodeStringCharSequence(input), sb, 0, provider, status); assertSuccess("Spot 1", status); assertEquals(input, expected, sb.toUnicodeString()); + assertEquals(input, expected, sb.toTempUnicodeString()); } // Test insertion position diff --git a/icu4c/source/test/intltest/numbertest_stringsegment.cpp b/icu4c/source/test/intltest/numbertest_stringsegment.cpp index 665bc7c52b0..6c11df8f96b 100644 --- a/icu4c/source/test/intltest/numbertest_stringsegment.cpp +++ b/icu4c/source/test/intltest/numbertest_stringsegment.cpp @@ -50,10 +50,13 @@ void StringSegmentTest::testLength() { void StringSegmentTest::testCharAt() { StringSegment segment(SAMPLE_STRING, 0); assertEquals("Initial", SAMPLE_STRING, segment.toUnicodeString()); + assertEquals("Initial", SAMPLE_STRING, segment.toTempUnicodeString()); segment.adjustOffset(3); assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toUnicodeString()); + assertEquals("After adjust-offset", UnicodeString(u"radio 📻"), segment.toTempUnicodeString()); segment.setLength(5); assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toUnicodeString()); + assertEquals("After adjust-length", UnicodeString(u"radio"), segment.toTempUnicodeString()); } void StringSegmentTest::testGetCodePoint() {