From afbb37febdd194f2a32289571a65ae24d40b93cf Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 10 Feb 2018 14:29:26 +0000 Subject: [PATCH] ICU-13574 Checkpoint commit. AffixMatcher is mostly implemented. X-SVN-Rev: 40894 --- icu4c/source/i18n/number_affixutils.cpp | 17 ++ icu4c/source/i18n/number_affixutils.h | 47 ++-- icu4c/source/i18n/number_patternmodifier.cpp | 202 ++++++--------- icu4c/source/i18n/number_patternmodifier.h | 36 +-- icu4c/source/i18n/number_patternstring.cpp | 142 +++++++--- icu4c/source/i18n/number_patternstring.h | 89 ++++--- icu4c/source/i18n/number_types.h | 42 +-- icu4c/source/i18n/numparse_affixes.cpp | 244 +++++++++++++++++- icu4c/source/i18n/numparse_affixes.h | 93 ++++++- icu4c/source/i18n/numparse_symbols.cpp | 2 +- icu4c/source/i18n/numparse_symbols.h | 2 +- icu4c/source/i18n/numparse_types.h | 34 ++- .../source/test/intltest/numbertest_parse.cpp | 12 +- 13 files changed, 664 insertions(+), 298 deletions(-) diff --git a/icu4c/source/i18n/number_affixutils.cpp b/icu4c/source/i18n/number_affixutils.cpp index 072edbb2fa6..2d08414ddd6 100644 --- a/icu4c/source/i18n/number_affixutils.cpp +++ b/icu4c/source/i18n/number_affixutils.cpp @@ -7,6 +7,7 @@ #include "number_affixutils.h" #include "unicode/utf16.h" +#include "unicode/uniset.h" using namespace icu; using namespace icu::number; @@ -239,6 +240,22 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat return output; } +bool AffixUtils::containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern, + const UnicodeSet& ignorables, UErrorCode& status) { + if (affixPattern.length() == 0) { + return true; + }; + AffixTag tag; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern, status); + if (U_FAILURE(status)) { return false; } + if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) { + return false; + } + } + return true; +} + void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer, UErrorCode& status) { if (affixPattern.length() == 0) { diff --git a/icu4c/source/i18n/number_affixutils.h b/icu4c/source/i18n/number_affixutils.h index 665a9d84256..d8b525a6e1f 100644 --- a/icu4c/source/i18n/number_affixutils.h +++ b/icu4c/source/i18n/number_affixutils.h @@ -37,13 +37,14 @@ struct AffixTag { AffixPatternState state; AffixPatternType type; - AffixTag() : offset(0), state(STATE_BASE) {} + AffixTag() + : offset(0), state(STATE_BASE) {} - AffixTag(int32_t offset) : offset(offset) {} + AffixTag(int32_t offset) + : offset(offset) {} AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type) - : offset(offset), codePoint(codePoint), state(state), type(type) - {} + : offset(offset), codePoint(codePoint), state(state), type(type) {} }; class TokenConsumer { @@ -112,7 +113,7 @@ class U_I18N_API AffixUtils { * @param patternString The original string whose width will be estimated. * @return The length of the unescaped string. */ - static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status); + static int32_t estimateLength(const CharSequence& patternString, UErrorCode& status); /** * Takes a string and escapes (quotes) characters that have special meaning in the affix pattern @@ -123,7 +124,7 @@ class U_I18N_API AffixUtils { * @param input The string to be escaped. * @return The resulting UnicodeString. */ - static UnicodeString escape(const CharSequence &input); + static UnicodeString escape(const CharSequence& input); static Field getFieldForType(AffixPatternType type); @@ -139,9 +140,8 @@ class U_I18N_API AffixUtils { * @param position The index into the NumberStringBuilder to insert the string. * @param provider An object to generate locale symbols. */ - static int32_t - unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position, - const SymbolProvider &provider, UErrorCode &status); + static int32_t unescape(const CharSequence& affixPattern, NumberStringBuilder& output, + int32_t position, const SymbolProvider& provider, UErrorCode& status); /** * Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape} @@ -151,8 +151,8 @@ class U_I18N_API AffixUtils { * @param provider An object to generate locale symbols. * @return The same return value as if you called {@link #unescape}. */ - static int32_t unescapedCodePointCount(const CharSequence &affixPattern, - const SymbolProvider &provider, UErrorCode &status); + static int32_t unescapedCodePointCount(const CharSequence& affixPattern, + const SymbolProvider& provider, UErrorCode& status); /** * Checks whether the given affix pattern contains at least one token of the given type, which is @@ -162,8 +162,7 @@ class U_I18N_API AffixUtils { * @param type The token type. * @return true if the affix pattern contains the given token type; false otherwise. */ - static bool - containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status); + static bool containsType(const CharSequence& affixPattern, AffixPatternType type, UErrorCode& status); /** * Checks whether the specified affix pattern has any unquoted currency symbols ("¤"). @@ -171,7 +170,7 @@ class U_I18N_API AffixUtils { * @param affixPattern The string to check for currency symbols. * @return true if the literal has at least one unquoted currency symbol; false otherwise. */ - static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status); + static bool hasCurrencySymbols(const CharSequence& affixPattern, UErrorCode& status); /** * Replaces all occurrences of tokens with the given type with the given replacement char. @@ -181,9 +180,15 @@ class U_I18N_API AffixUtils { * @param replacementChar The char to substitute in place of chars of the given token type. * @return A string containing the new affix pattern. */ - static UnicodeString - replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar, - UErrorCode &status); + static UnicodeString replaceType(const CharSequence& affixPattern, AffixPatternType type, + char16_t replacementChar, UErrorCode& status); + + /** + * Returns whether the given affix pattern contains only symbols and ignorables as defined by the + * given ignorables set. + */ + static bool containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern, + const UnicodeSet& ignorables, UErrorCode& status); /** * Iterates over the affix pattern, calling the TokenConsumer for each token. @@ -201,7 +206,7 @@ class U_I18N_API AffixUtils { * (never negative), or -1 if there were no more tokens in the affix pattern. * @see #hasNext */ - static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status); + static AffixTag nextToken(AffixTag tag, const CharSequence& patternString, UErrorCode& status); /** * Returns whether the affix pattern string has any more tokens to be retrieved from a call to @@ -211,7 +216,7 @@ class U_I18N_API AffixUtils { * @param string The affix pattern. * @return true if there are more tokens to consume; false otherwise. */ - static bool hasNext(const AffixTag &tag, const CharSequence &string); + static bool hasNext(const AffixTag& tag, const CharSequence& string); private: /** @@ -219,8 +224,8 @@ class U_I18N_API AffixUtils { * The order of the arguments is consistent with Java, but the order of the stored * fields is not necessarily the same. */ - static inline AffixTag - makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) { + static inline AffixTag makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, + UChar32 cp) { return {offset, cp, state, type}; } }; diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp index e182104c911..b77f559a267 100644 --- a/icu4c/source/i18n/number_patternmodifier.cpp +++ b/icu4c/source/i18n/number_patternmodifier.cpp @@ -15,9 +15,10 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; -MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {} +MutablePatternModifier::MutablePatternModifier(bool isStrong) + : fStrong(isStrong) {} -void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) { +void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternInfo) { this->patternInfo = patternInfo; } @@ -26,12 +27,11 @@ void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay this->perMilleReplacesPercent = perMille; } -void -MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit ¤cy, - const UNumberUnitWidth unitWidth, const PluralRules *rules) { +void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, const CurrencyUnit& currency, + const UNumberUnitWidth unitWidth, const PluralRules* rules) { U_ASSERT((rules != nullptr) == needsPlurals()); this->symbols = symbols; - uprv_memcpy(static_cast(this->currencyCode), + uprv_memcpy(static_cast(this->currencyCode), currency.getISOCurrency(), sizeof(char16_t) * 4); this->unitWidth = unitWidth; @@ -49,12 +49,12 @@ bool MutablePatternModifier::needsPlurals() const { // Silently ignore any error codes. } -ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) { +ImmutablePatternModifier* MutablePatternModifier::createImmutable(UErrorCode& status) { return createImmutableAndChain(nullptr, status); } -ImmutablePatternModifier * -MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) { +ImmutablePatternModifier* +MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* parent, UErrorCode& status) { // TODO: Move StandardPlural VALUES to standardplural.h static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = { @@ -89,11 +89,11 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren } else { // Faster path when plural keyword is not needed. setNumberProperties(1, StandardPlural::Form::COUNT); - Modifier *positive = createConstantModifier(status); + Modifier* positive = createConstantModifier(status); setNumberProperties(0, StandardPlural::Form::COUNT); - Modifier *zero = createConstantModifier(status); + Modifier* zero = createConstantModifier(status); setNumberProperties(-1, StandardPlural::Form::COUNT); - Modifier *negative = createConstantModifier(status); + Modifier* negative = createConstantModifier(status); pm->adoptPositiveNegativeModifiers(positive, zero, negative); if (U_FAILURE(status)) { delete pm; @@ -103,29 +103,30 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren } } -ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) { +ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) { NumberStringBuilder a; NumberStringBuilder b; insertPrefix(a, 0, status); insertSuffix(b, 0, status); if (patternInfo->hasCurrencySign()) { - return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status); + return new CurrencySpacingEnabledModifier( + a, b, !patternInfo->hasBody(), fStrong, *symbols, status); } else { return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong); } } -ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, - const MicroPropsGenerator *parent) +ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules, + const MicroPropsGenerator* parent) : pm(pm), rules(rules), parent(parent) {} -void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps µs, - UErrorCode &status) const { +void ImmutablePatternModifier::processQuantity(DecimalQuantity& quantity, MicroProps& micros, + UErrorCode& status) const { parent->processQuantity(quantity, micros, status); applyToMicros(micros, quantity); } -void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const { +void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const { if (rules == nullptr) { micros.modMiddle = pm->getModifier(quantity.signum()); } else { @@ -138,17 +139,17 @@ void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity } /** Used by the unsafe code path. */ -MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) { +MicroPropsGenerator& MutablePatternModifier::addToChain(const MicroPropsGenerator* parent) { this->parent = parent; return *this; } -void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps µs, - UErrorCode &status) const { +void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& micros, + UErrorCode& status) const { parent->processQuantity(fq, micros, status); // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. - auto nonConstThis = const_cast(this); + auto nonConstThis = const_cast(this); if (needsPlurals()) { // TODO: Fix this. Avoid the copy. DecimalQuantity copy(fq); @@ -160,20 +161,24 @@ void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &mi micros.modMiddle = this; } -int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, - UErrorCode &status) const { +int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex, + UErrorCode& status) const { // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. - auto nonConstThis = const_cast(this); + auto nonConstThis = const_cast(this); int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status); int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status); // If the pattern had no decimal stem body (like #,##0.00), overwrite the value. int32_t overwriteLen = 0; if (!patternInfo->hasBody()) { overwriteLen = output.splice( - leftIndex + prefixLen, rightIndex + prefixLen, - UnicodeString(), 0, 0, UNUM_FIELD_COUNT, - status); + leftIndex + prefixLen, + rightIndex + prefixLen, + UnicodeString(), + 0, + 0, + UNUM_FIELD_COUNT, + status); } CurrencySpacingEnabledModifier::applyCurrencySpacing( output, @@ -186,30 +191,36 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftI return prefixLen + overwriteLen + suffixLen; } -int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const { +int32_t MutablePatternModifier::getPrefixLength(UErrorCode& status) const { // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. - auto nonConstThis = const_cast(this); + auto nonConstThis = const_cast(this); // Enter and exit CharSequence Mode to get the length. - nonConstThis->enterCharSequenceMode(true); - int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length - nonConstThis->exitCharSequenceMode(); + nonConstThis->prepareAffix(true); + int result = AffixUtils::unescapedCodePointCount( + UnicodeStringCharSequence(currentAffix), + *this, + status); // prefix length return result; } -int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const { +int32_t MutablePatternModifier::getCodePointCount(UErrorCode& status) const { // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. - auto nonConstThis = const_cast(this); - - // Enter and exit CharSequence Mode to get the length. - nonConstThis->enterCharSequenceMode(true); - int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length - nonConstThis->exitCharSequenceMode(); - nonConstThis->enterCharSequenceMode(false); - result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length - nonConstThis->exitCharSequenceMode(); + auto nonConstThis = const_cast(this); + + // Render the affixes to get the length + nonConstThis->prepareAffix(true); + int result = AffixUtils::unescapedCodePointCount( + UnicodeStringCharSequence(currentAffix), + *this, + status); // prefix length + nonConstThis->prepareAffix(false); + result += AffixUtils::unescapedCodePointCount( + UnicodeStringCharSequence(currentAffix), + *this, + status); // suffix length return result; } @@ -217,20 +228,26 @@ bool MutablePatternModifier::isStrong() const { return fStrong; } -int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) { - enterCharSequenceMode(true); - int length = AffixUtils::unescape(*this, sb, position, *this, status); - exitCharSequenceMode(); +int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) { + prepareAffix(true); + int length = AffixUtils::unescape( + UnicodeStringCharSequence(currentAffix), sb, position, *this, status); return length; } -int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) { - enterCharSequenceMode(false); - int length = AffixUtils::unescape(*this, sb, position, *this, status); - exitCharSequenceMode(); +int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) { + prepareAffix(false); + int length = AffixUtils::unescape( + UnicodeStringCharSequence(currentAffix), sb, position, *this, status); return length; } +/** This method contains the heart of the logic for rendering LDML affix strings. */ +void MutablePatternModifier::prepareAffix(bool isPrefix) { + PatternStringUtils::patternInfoToStringBuilder( + *patternInfo, isPrefix, signum, signDisplay, plural, perMilleReplacesPercent, currentAffix); +} + UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { switch (type) { case AffixPatternType::TYPE_MINUS_SIGN: @@ -249,12 +266,12 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { return UnicodeString(); } else { UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) - ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME - : UCurrNameStyle::UCURR_SYMBOL_NAME; + ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME + : UCurrNameStyle::UCURR_SYMBOL_NAME; UErrorCode status = U_ZERO_ERROR; UBool isChoiceFormat = FALSE; int32_t symbolLen = 0; - const char16_t *symbol = ucurr_getName( + const char16_t* symbol = ucurr_getName( currencyCode, symbols->getLocale().getName(), selector, @@ -274,7 +291,7 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { UErrorCode status = U_ZERO_ERROR; UBool isChoiceFormat = FALSE; int32_t symbolLen = 0; - const char16_t *symbol = ucurr_getPluralName( + const char16_t* symbol = ucurr_getPluralName( currencyCode, symbols->getLocale().getName(), &isChoiceFormat, @@ -293,79 +310,6 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { } } -/** This method contains the heart of the logic for rendering LDML affix strings. */ -void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) { - U_ASSERT(!inCharSequenceMode); - inCharSequenceMode = true; - - // Should the output render '+' where '-' would normally appear in the pattern? - plusReplacesMinusSign = signum != -1 - && (signDisplay == UNUM_SIGN_ALWAYS - || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS - || (signum == 1 - && (signDisplay == UNUM_SIGN_EXCEPT_ZERO - || signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) - && patternInfo->positiveHasPlusSign() == false; - - // Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.) - bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && ( - signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign)); - - // Resolve the flags for the affix pattern. - fFlags = 0; - if (useNegativeAffixPattern) { - fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; - } - if (isPrefix) { - fFlags |= AffixPatternProvider::AFFIX_PREFIX; - } - if (plural != StandardPlural::Form::COUNT) { - U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); - fFlags |= plural; - } - - // Should we prepend a sign to the pattern? - if (!isPrefix || useNegativeAffixPattern) { - prependSign = false; - } else if (signum == -1) { - prependSign = signDisplay != UNUM_SIGN_NEVER; - } else { - prependSign = plusReplacesMinusSign; - } - - // Finally, compute the length of the affix pattern. - fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0); -} - -void MutablePatternModifier::exitCharSequenceMode() { - U_ASSERT(inCharSequenceMode); - inCharSequenceMode = false; -} - -int32_t MutablePatternModifier::length() const { - U_ASSERT(inCharSequenceMode); - return fLength; -} - -char16_t MutablePatternModifier::charAt(int32_t index) const { - U_ASSERT(inCharSequenceMode); - char16_t candidate; - if (prependSign && index == 0) { - candidate = u'-'; - } else if (prependSign) { - candidate = patternInfo->charAt(fFlags, index - 1); - } else { - candidate = patternInfo->charAt(fFlags, index); - } - if (plusReplacesMinusSign && candidate == u'-') { - return u'+'; - } - if (perMilleReplacesPercent && candidate == u'%') { - return u'‰'; - } - return candidate; -} - UnicodeString MutablePatternModifier::toUnicodeString() const { // Never called by AffixUtils U_ASSERT(false); diff --git a/icu4c/source/i18n/number_patternmodifier.h b/icu4c/source/i18n/number_patternmodifier.h index 9c8b95f7764..ddce46337ee 100644 --- a/icu4c/source/i18n/number_patternmodifier.h +++ b/icu4c/source/i18n/number_patternmodifier.h @@ -35,20 +35,21 @@ class MutablePatternModifier; // Exported as U_I18N_API because it is needed for the unit test PatternModifierTest class U_I18N_API ImmutablePatternModifier : public MicroPropsGenerator, public UMemory { public: - ~ImmutablePatternModifier() U_OVERRIDE = default; + ~ImmutablePatternModifier() U_OVERRIDE = default; - void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE; + void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE; - void applyToMicros(MicroProps µs, DecimalQuantity &quantity) const; + void applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const; private: - ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, const MicroPropsGenerator *parent); + ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules, + const MicroPropsGenerator* parent); const LocalPointer pm; - const PluralRules *rules; - const MicroPropsGenerator *parent; + const PluralRules* rules; + const MicroPropsGenerator* parent; - friend class MutablePatternModifier; + friend class MutablePatternModifier; }; /** @@ -74,7 +75,6 @@ class U_I18N_API MutablePatternModifier : public MicroPropsGenerator, public Modifier, public SymbolProvider, - public CharSequence, public UMemory { public: @@ -187,13 +187,7 @@ class U_I18N_API MutablePatternModifier */ UnicodeString getSymbol(AffixPatternType type) const U_OVERRIDE; - int32_t length() const U_OVERRIDE; - - char16_t charAt(int32_t index) const U_OVERRIDE; - - // Use default implementation of codePointAt - - UnicodeString toUnicodeString() const U_OVERRIDE; + UnicodeString toUnicodeString() const; private: // Modifier details (initialized in constructor) @@ -217,12 +211,8 @@ class U_I18N_API MutablePatternModifier // QuantityChain details (initialized in addToChain) const MicroPropsGenerator *parent; - // Transient CharSequence fields (initialized in enterCharSequenceMode) - bool inCharSequenceMode = false; - int32_t fFlags; - int32_t fLength; - bool prependSign; - bool plusReplacesMinusSign; + // Transient fields for rendering + UnicodeString currentAffix; /** * Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support @@ -244,9 +234,7 @@ class U_I18N_API MutablePatternModifier int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status); - void enterCharSequenceMode(bool isPrefix); - - void exitCharSequenceMode(); + void prepareAffix(bool isPrefix); }; diff --git a/icu4c/source/i18n/number_patternstring.cpp b/icu4c/source/i18n/number_patternstring.cpp index 20178824b0e..68f55001bb7 100644 --- a/icu4c/source/i18n/number_patternstring.cpp +++ b/icu4c/source/i18n/number_patternstring.cpp @@ -14,25 +14,27 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; -void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) { +void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, + UErrorCode& status) { patternInfo.consumePattern(patternString, status); } DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, - UErrorCode &status) { + UErrorCode& status) { DecimalFormatProperties properties; parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); return properties; } -void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, - IgnoreRounding ignoreRounding, UErrorCode &status) { +void +PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); } char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { - const Endpoints &endpoints = getEndpoints(flags); + const Endpoints& endpoints = getEndpoints(flags); if (index < 0 || index >= endpoints.end - endpoints.start) { U_ASSERT(false); } @@ -43,12 +45,12 @@ int32_t ParsedPatternInfo::length(int32_t flags) const { return getLengthFromEndpoints(getEndpoints(flags)); } -int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) { +int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { return endpoints.end - endpoints.start; } UnicodeString ParsedPatternInfo::getString(int32_t flags) const { - const Endpoints &endpoints = getEndpoints(flags); + const Endpoints& endpoints = getEndpoints(flags); if (endpoints.start == endpoints.end) { return UnicodeString(); } @@ -56,7 +58,7 @@ UnicodeString ParsedPatternInfo::getString(int32_t flags) const { return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); } -const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const { +const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { bool prefix = (flags & AFFIX_PREFIX) != 0; bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; bool padding = (flags & AFFIX_PADDING) != 0; @@ -91,7 +93,7 @@ bool ParsedPatternInfo::hasCurrencySign() const { return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); } -bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const { +bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status); } @@ -117,7 +119,7 @@ UChar32 ParsedPatternInfo::ParserState::next() { return codePoint; } -void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) { +void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { if (U_FAILURE(status)) { return; } this->pattern = patternString; @@ -141,7 +143,7 @@ void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErro } } -void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) { +void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { // subpattern := literals? number exponent? literals? consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); if (U_FAILURE(status)) { return; } @@ -161,7 +163,7 @@ void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) { if (U_FAILURE(status)) { return; } } -void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) { +void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { if (state.peek() != u'*') { return; } @@ -177,7 +179,7 @@ void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode & currentSubpattern->paddingEndpoints.end = state.offset; } -void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) { +void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { // literals := { literal } endpoints.start = state.offset; while (true) { @@ -233,7 +235,7 @@ void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) { endpoints.end = state.offset; } -void ParsedPatternInfo::consumeLiteral(UErrorCode &status) { +void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { if (state.peek() == -1) { state.toParseException(u"Expected unquoted literal but found EOL"); status = U_PATTERN_SYNTAX_ERROR; @@ -256,7 +258,7 @@ void ParsedPatternInfo::consumeLiteral(UErrorCode &status) { } } -void ParsedPatternInfo::consumeFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeFormat(UErrorCode& status) { consumeIntegerFormat(status); if (U_FAILURE(status)) { return; } if (state.peek() == u'.') { @@ -268,9 +270,9 @@ void ParsedPatternInfo::consumeFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; while (true) { switch (state.peek()) { @@ -359,9 +361,9 @@ void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; int32_t zeroCounter = 0; while (true) { @@ -407,9 +409,9 @@ void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeExponent(UErrorCode &status) { +void ParsedPatternInfo::consumeExponent(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; if (state.peek() != u'E') { return; @@ -437,9 +439,9 @@ void ParsedPatternInfo::consumeExponent(UErrorCode &status) { /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// /////////////////////////////////////////////////// -void -PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties, - IgnoreRounding ignoreRounding, UErrorCode &status) { +void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, + DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { if (pattern.length() == 0) { // Backwards compatibility requires that we reset to the default values. // TODO: Only overwrite the properties that "saveToProperties" normally touches? @@ -453,13 +455,13 @@ PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, Decim patternInfoToProperties(properties, patternInfo, ignoreRounding, status); } -void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, - ParsedPatternInfo& patternInfo, - IgnoreRounding _ignoreRounding, UErrorCode &status) { +void +PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, + IgnoreRounding _ignoreRounding, UErrorCode& status) { // Translate from PatternParseResult to Properties. // Note that most data from "negative" is ignored per the specification of DecimalFormat. - const ParsedSubpatternInfo &positive = patternInfo.positive; + const ParsedSubpatternInfo& positive = patternInfo.positive; bool ignoreRounding; if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { @@ -508,8 +510,7 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, properties.maximumFractionDigits = -1; properties.roundingIncrement = 0.0; properties.minimumSignificantDigits = positive.integerAtSigns; - properties.maximumSignificantDigits = - positive.integerAtSigns + positive.integerTrailingHashSigns; + properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; } else if (!positive.rounding.isZero()) { if (!ignoreRounding) { properties.minimumFractionDigits = minFrac; @@ -570,9 +571,9 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, // Padding settings if (!positive.paddingLocation.isNull()) { // The width of the positive prefix and suffix templates are included in the padding - int paddingWidth = - positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) + - AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status); + int paddingWidth = positive.widthExceptAffixes + + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) + + AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status); properties.formatWidth = paddingWidth; UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); if (rawPaddingString.length() == 1) { @@ -622,8 +623,8 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, /// End PatternStringParser.java; begin PatternStringUtils.java /// /////////////////////////////////////////////////////////////////// -UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties, - UErrorCode &status) { +UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, + UErrorCode& status) { UnicodeString sb; // Convenience references @@ -632,7 +633,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax); int firstGroupingSize = uprv_min(properties.groupingSize, dosMax); int paddingWidth = uprv_min(properties.formatWidth, dosMax); - NullableValue paddingLocation = properties.padPosition; + NullableValue paddingLocation = properties.padPosition; UnicodeString paddingString = properties.padString; int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0); int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); @@ -809,8 +810,8 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP } int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, - UErrorCode &status) { - (void)status; + UErrorCode& status) { + (void) status; if (input.length() == 0) { input.setTo(kFallbackPaddingString, -1); } @@ -840,4 +841,69 @@ int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& return output.length() - startLength; } +void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, + int8_t signum, UNumberSignDisplay signDisplay, + StandardPlural::Form plural, + bool perMilleReplacesPercent, UnicodeString& output) { + + // Should the output render '+' where '-' would normally appear in the pattern? + bool plusReplacesMinusSign = signum != -1 && ( + signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || ( + signum == 1 && ( + signDisplay == UNUM_SIGN_EXCEPT_ZERO || + signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) && + patternInfo.positiveHasPlusSign() == false; + + // Should we use the affix from the negative subpattern? (If not, we will use the positive + // subpattern.) + bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && ( + signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); + + // Resolve the flags for the affix pattern. + int flags = 0; + if (useNegativeAffixPattern) { + flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; + } + if (isPrefix) { + flags |= AffixPatternProvider::AFFIX_PREFIX; + } + if (plural != StandardPlural::Form::COUNT) { + U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); + flags |= plural; + } + + // Should we prepend a sign to the pattern? + bool prependSign; + if (!isPrefix || useNegativeAffixPattern) { + prependSign = false; + } else if (signum == -1) { + prependSign = signDisplay != UNUM_SIGN_NEVER; + } else { + prependSign = plusReplacesMinusSign; + } + + // Compute the length of the affix pattern. + int length = patternInfo.length(flags) + (prependSign ? 1 : 0); + + // Finally, set the result into the StringBuilder. + output.remove(); + for (int index = 0; index < length; index++) { + char16_t candidate; + if (prependSign && index == 0) { + candidate = u'-'; + } else if (prependSign) { + candidate = patternInfo.charAt(flags, index - 1); + } else { + candidate = patternInfo.charAt(flags, index); + } + if (plusReplacesMinusSign && candidate == u'-') { + candidate = u'+'; + } + if (perMilleReplacesPercent && candidate == u'%') { + candidate = u'‰'; + } + output.append(candidate); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_patternstring.h b/icu4c/source/i18n/number_patternstring.h index ec44290d663..0a343f63782 100644 --- a/icu4c/source/i18n/number_patternstring.h +++ b/icu4c/source/i18n/number_patternstring.h @@ -62,17 +62,18 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor ParsedSubpatternInfo positive; ParsedSubpatternInfo negative; - ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {} + ParsedPatternInfo() + : state(this->pattern), currentSubpattern(nullptr) {} ~ParsedPatternInfo() U_OVERRIDE = default; - static int32_t getLengthFromEndpoints(const Endpoints &endpoints); + static int32_t getLengthFromEndpoints(const Endpoints& endpoints); char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE; int32_t length(int32_t flags) const U_OVERRIDE; - UnicodeString getString(int32_t flags) const; + UnicodeString getString(int32_t flags) const U_OVERRIDE; bool positiveHasPlusSign() const U_OVERRIDE; @@ -82,16 +83,17 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor bool hasCurrencySign() const U_OVERRIDE; - bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE; + bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE; bool hasBody() const U_OVERRIDE; private: struct U_I18N_API ParserState { - const UnicodeString &pattern; // reference to the parent + const UnicodeString& pattern; // reference to the parent int32_t offset = 0; - explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {}; + explicit ParserState(const UnicodeString& _pattern) + : pattern(_pattern) {}; UChar32 peek(); @@ -99,41 +101,40 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor // TODO: We don't currently do anything with the message string. // This method is here as a shell for Java compatibility. - inline void toParseException(const char16_t *message) { (void)message; } - } - state; + inline void toParseException(const char16_t* message) { (void) message; } + } state; // NOTE: In Java, these are written as pure functions. // In C++, they're written as methods. // The behavior is the same. // Mutable transient pointer: - ParsedSubpatternInfo *currentSubpattern; + ParsedSubpatternInfo* currentSubpattern; // In Java, "negative == null" tells us whether or not we had a negative subpattern. // In C++, we need to remember in another boolean. bool fHasNegativeSubpattern = false; - const Endpoints &getEndpoints(int32_t flags) const; + const Endpoints& getEndpoints(int32_t flags) const; /** Run the recursive descent parser. */ - void consumePattern(const UnicodeString &patternString, UErrorCode &status); + void consumePattern(const UnicodeString& patternString, UErrorCode& status); - void consumeSubpattern(UErrorCode &status); + void consumeSubpattern(UErrorCode& status); - void consumePadding(PadPosition paddingLocation, UErrorCode &status); + void consumePadding(PadPosition paddingLocation, UErrorCode& status); - void consumeAffix(Endpoints &endpoints, UErrorCode &status); + void consumeAffix(Endpoints& endpoints, UErrorCode& status); - void consumeLiteral(UErrorCode &status); + void consumeLiteral(UErrorCode& status); - void consumeFormat(UErrorCode &status); + void consumeFormat(UErrorCode& status); - void consumeIntegerFormat(UErrorCode &status); + void consumeIntegerFormat(UErrorCode& status); - void consumeFractionFormat(UErrorCode &status); + void consumeFractionFormat(UErrorCode& status); - void consumeExponent(UErrorCode &status); + void consumeExponent(UErrorCode& status); friend class PatternParser; }; @@ -153,8 +154,8 @@ class U_I18N_API PatternParser { * The LDML decimal format pattern (Excel-style pattern) to parse. * @return The results of the parse. */ - static void - parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status); + static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, + UErrorCode& status); enum IgnoreRounding { IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2 @@ -173,8 +174,8 @@ class U_I18N_API PatternParser { * @throws IllegalArgumentException * If there is a syntax error in the pattern string. */ - static DecimalFormatProperties - parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status); + static DecimalFormatProperties parseToProperties(const UnicodeString& pattern, + IgnoreRounding ignoreRounding, UErrorCode& status); /** * Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string @@ -190,18 +191,19 @@ class U_I18N_API PatternParser { * @throws IllegalArgumentException * If there was a syntax error in the pattern string. */ - static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, - IgnoreRounding ignoreRounding, UErrorCode &status); + static void parseToExistingProperties(const UnicodeString& pattern, + DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status); private: - static void - parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties, - IgnoreRounding ignoreRounding, UErrorCode &status); + static void parseToExistingPropertiesImpl(const UnicodeString& pattern, + DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status); /** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */ - static void - patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo& patternInfo, - IgnoreRounding _ignoreRounding, UErrorCode &status); + static void patternInfoToProperties(DecimalFormatProperties& properties, + ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding, + UErrorCode& status); }; class U_I18N_API PatternStringUtils { @@ -217,8 +219,8 @@ class U_I18N_API PatternStringUtils { * The property bag to serialize. * @return A pattern string approximately serializing the property bag. */ - static UnicodeString - propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status); + static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties, + UErrorCode& status); /** @@ -248,14 +250,23 @@ class U_I18N_API PatternStringUtils { * notation. * @return The pattern expressed in the other notation. */ - static UnicodeString - convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized, - UErrorCode &status); + static UnicodeString convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, + bool toLocalized, UErrorCode& status); + + /** + * This method contains the heart of the logic for rendering LDML affix strings. It handles + * sign-always-shown resolution, whether to use the positive or negative subpattern, permille + * substitution, and plural forms for CurrencyPluralInfo. + */ + static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, + int8_t signum, UNumberSignDisplay signDisplay, + StandardPlural::Form plural, bool perMilleReplacesPercent, + UnicodeString& output); private: /** @return The number of chars inserted. */ - static int - escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status); + static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, + UErrorCode& status); }; } // namespace impl diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h index c01765e2cea..3e833125386 100644 --- a/icu4c/source/i18n/number_types.h +++ b/icu4c/source/i18n/number_types.h @@ -16,8 +16,7 @@ #include "uassert.h" #include "unicode/platform.h" -U_NAMESPACE_BEGIN -namespace number { +U_NAMESPACE_BEGIN namespace number { namespace impl { // Typedef several enums for brevity and for easier comparison to Java. @@ -87,15 +86,14 @@ enum AffixPatternType { }; enum CompactType { - TYPE_DECIMAL, - TYPE_CURRENCY + TYPE_DECIMAL, TYPE_CURRENCY }; // TODO: Should this be moved somewhere else, maybe where other ICU classes can use it? // Exported as U_I18N_API because it is a base class for other exported types class U_I18N_API CharSequence { -public: + public: virtual ~CharSequence() = default; virtual int32_t length() const = 0; @@ -123,12 +121,20 @@ class U_I18N_API AffixPatternProvider { static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200; static const int32_t AFFIX_PADDING = 0x400; + // Convenience compound flags + static const int32_t AFFIX_POS_PREFIX = AFFIX_PREFIX; + static const int32_t AFFIX_POS_SUFFIX = 0; + static const int32_t AFFIX_NEG_PREFIX = AFFIX_PREFIX | AFFIX_NEGATIVE_SUBPATTERN; + static const int32_t AFFIX_NEG_SUFFIX = AFFIX_NEGATIVE_SUBPATTERN; + virtual ~AffixPatternProvider() = default; virtual char16_t charAt(int flags, int i) const = 0; virtual int length(int flags) const = 0; + virtual UnicodeString getString(int flags) const = 0; + virtual bool hasCurrencySign() const = 0; virtual bool positiveHasPlusSign() const = 0; @@ -137,7 +143,7 @@ class U_I18N_API AffixPatternProvider { virtual bool negativeHasMinusSign() const = 0; - virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0; + virtual bool containsSymbolType(AffixPatternType, UErrorCode&) const = 0; /** * True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not @@ -173,8 +179,8 @@ class U_I18N_API Modifier { * formatted. * @return The number of characters (UTF-16 code units) that were added to the string builder. */ - virtual int32_t - apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0; + virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex, + UErrorCode& status) const = 0; /** * Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the @@ -187,7 +193,7 @@ class U_I18N_API Modifier { /** * Returns the number of code points in the modifier, prefix plus suffix. */ - virtual int32_t getCodePointCount(UErrorCode &status) const = 0; + virtual int32_t getCodePointCount(UErrorCode& status) const = 0; /** * Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed @@ -230,7 +236,8 @@ class U_I18N_API MicroPropsGenerator { * The MicroProps instance to populate. * @return A MicroProps instance resolved for the quantity. */ - virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0; + virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, + UErrorCode& status) const = 0; }; /** @@ -255,24 +262,25 @@ class MultiplierProducer { template class U_I18N_API NullableValue { public: - NullableValue() : fNull(true) {} + NullableValue() + : fNull(true) {} - NullableValue(const NullableValue &other) = default; + NullableValue(const NullableValue& other) = default; - explicit NullableValue(const T &other) { + explicit NullableValue(const T& other) { fValue = other; fNull = false; } - NullableValue &operator=(const NullableValue &other) = default; + NullableValue& operator=(const NullableValue& other) = default; - NullableValue &operator=(const T &other) { + NullableValue& operator=(const T& other) { fValue = other; fNull = false; return *this; } - bool operator==(const NullableValue &other) const { + bool operator==(const NullableValue& other) const { // "fValue == other.fValue" returns UBool, not bool (causes compiler warnings) return fNull ? other.fNull : (other.fNull ? false : static_cast(fValue == other.fValue)); } @@ -286,7 +294,7 @@ class U_I18N_API NullableValue { return fNull; } - T get(UErrorCode &status) const { + T get(UErrorCode& status) const { if (fNull) { status = U_UNDEFINED_VARIABLE; } diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 66ce2bef8ca..a164f0b70e4 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -87,10 +87,10 @@ AffixPatternMatcher AffixPatternMatcherBuilder::build() { AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode, - const UnicodeString& currency1, - const UnicodeString& currency2, - const DecimalFormatSymbols& dfs, - IgnorablesMatcher* ignorables, const Locale& locale) + const UnicodeString* currency1, + const UnicodeString* currency2, + const DecimalFormatSymbols* dfs, + IgnorablesMatcher* ignorables, const Locale* locale) : currency1(currency1), currency2(currency2), dfs(dfs), @@ -109,23 +109,23 @@ AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() { } NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() { - return fMinusSign = {dfs, true}; + return fMinusSign = {*dfs, true}; } NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() { - return fPlusSign = {dfs, true}; + return fPlusSign = {*dfs, true}; } NumberParseMatcher& AffixTokenMatcherWarehouse::percent() { - return fPercent = {dfs}; + return fPercent = {*dfs}; } NumberParseMatcher& AffixTokenMatcherWarehouse::permille() { - return fPermille = {dfs}; + return fPermille = {*dfs}; } NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) { - return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}}; + return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}}; } NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) { @@ -193,8 +193,232 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern) - : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) { + : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {} + +UnicodeString AffixPatternMatcher::getPattern() const { + return fPattern.toAliasedUnicodeString(); +} + +bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const { + return fPattern == other.fPattern; +} + + +AffixMatcherWarehouse::AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, + NumberParserImpl& output, + AffixTokenMatcherWarehouse& warehouse, + const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, + UErrorCode& status) + : fAffixTokenMatcherWarehouse(std::move(warehouse)) { + if (!isInteresting(patternInfo, ignorables, parseFlags, status)) { + return; + } + + // The affixes have interesting characters, or we are in strict mode. + // Use initial capacity of 6, the highest possible number of AffixMatchers. + UnicodeString sb; + bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES); + UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS + : UNUM_SIGN_NEVER; + + int32_t numAffixMatchers = 0; + int32_t numAffixPatternMatchers = 0; + + AffixPatternMatcher* posPrefix = nullptr; + AffixPatternMatcher* posSuffix = nullptr; + + // Pre-process the affix strings to resolve LDML rules like sign display. + for (int8_t signum = 1; signum >= -1; signum--) { + // Generate Prefix + bool hasPrefix = false; + PatternStringUtils::patternInfoToStringBuilder( + patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb); + fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( + sb, warehouse, parseFlags, &hasPrefix, status); + AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++] + : nullptr; + + // Generate Suffix + bool hasSuffix = false; + PatternStringUtils::patternInfoToStringBuilder( + patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb); + fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( + sb, warehouse, parseFlags, &hasSuffix, status); + AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++] + : nullptr; + + if (signum == 1) { + posPrefix = prefix; + posSuffix = suffix; + } else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) { + // Skip adding these matchers (we already have equivalents) + continue; + } + + // Flags for setting in the ParsedNumber + int flags = (signum == -1) ? FLAG_NEGATIVE : 0; + + // Note: it is indeed possible for posPrefix and posSuffix to both be null. + // We still need to add that matcher for strict mode to work. + fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags}; + if (includeUnpaired && prefix != nullptr && suffix != nullptr) { + // The following if statements are designed to prevent adding two identical matchers. + if (signum == 1 || equals(prefix, posPrefix)) { + fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags}; + } + if (signum == 1 || equals(suffix, posSuffix)) { + fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags}; + } + } + } + + // Put the AffixMatchers in order, and then add them to the output. + // TODO +// Collections.sort(matchers, COMPARATOR); +// output.addMatchers(matchers); +} + +bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo, + const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, + UErrorCode& status) { + UnicodeStringCharSequence posPrefixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_PREFIX)); + UnicodeStringCharSequence posSuffixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); + UnicodeStringCharSequence negPrefixString(UnicodeString(u"")); + UnicodeStringCharSequence negSuffixString(UnicodeString(u"")); + if (patternInfo.hasNegativeSubpattern()) { + negPrefixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); + negSuffixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); + } + + if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) && + AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) && + AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) && + AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) && + AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status) + // HACK: Plus and minus sign are a special case: we accept them trailing only if they are + // trailing in the pattern string. + && !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) && + !AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) && + !AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) && + !AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) { + // The affixes contain only symbols and ignorables. + // No need to generate affix matchers. + return false; + } + return true; +} + +bool AffixMatcherWarehouse::equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) { + if (lhs == nullptr && rhs == nullptr) { + return true; + } + if (lhs == nullptr || rhs == nullptr) { + return false; + } + return *lhs == *rhs; +} + + +AffixMatcher::AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags) + : fPrefix(prefix), fSuffix(suffix), fFlags(flags) {} + +bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { + if (!result.seenNumber()) { + // Prefix + // Do not match if: + // 1. We have already seen a prefix (result.prefix != null) + // 2. The prefix in this AffixMatcher is empty (prefix == null) + if (!result.prefix.isBogus() || fPrefix == nullptr) { + return false; + } + + // Attempt to match the prefix. + int initialOffset = segment.getOffset(); + bool maybeMore = fPrefix->match(segment, result, status); + if (initialOffset != segment.getOffset()) { + result.prefix = fPrefix->getPattern(); + } + return maybeMore; + + } else { + // Suffix + // Do not match if: + // 1. We have already seen a suffix (result.suffix != null) + // 2. The suffix in this AffixMatcher is empty (suffix == null) + // 3. The matched prefix does not equal this AffixMatcher's prefix + if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) { + return false; + } + + // Attempt to match the suffix. + int initialOffset = segment.getOffset(); + bool maybeMore = fSuffix->match(segment, result, status); + if (initialOffset != segment.getOffset()) { + result.suffix = fSuffix->getPattern(); + } + return maybeMore; + } +} + +const UnicodeSet& AffixMatcher::getLeadCodePoints() { + if (fLocalLeadCodePoints.isNull()) { + auto* leadCodePoints = new UnicodeSet(); + if (fPrefix != nullptr) { + leadCodePoints->addAll(fPrefix->getLeadCodePoints()); + } + if (fSuffix != nullptr) { + leadCodePoints->addAll(fSuffix->getLeadCodePoints()); + } + leadCodePoints->freeze(); + fLocalLeadCodePoints.adoptInstead(leadCodePoints); + } + return *fLocalLeadCodePoints; +} + +void AffixMatcher::postProcess(ParsedNumber& result) const { + // Check to see if our affix is the one that was matched. If so, set the flags in the result. + if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) { + // Fill in the result prefix and suffix with non-null values (empty string). + // Used by strict mode to determine whether an entire affix pair was matched. + if (result.prefix.isBogus()) { + result.prefix = UnicodeString(); + } + if (result.suffix.isBogus()) { + result.suffix = UnicodeString(); + } + result.flags |= fFlags; + } +} + +bool AffixMatcher::matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) { + return (affix == nullptr && patternString.isBogus()) || + (affix != nullptr && affix->getPattern() == patternString); } #endif /* #if !UCONFIG_NO_FORMATTING */ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index fa11ab78d75..69c68227de5 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -19,6 +19,9 @@ namespace impl { class AffixPatternMatcherBuilder; class AffixPatternMatcher; +using ::icu::number::impl::AffixPatternProvider; +using ::icu::number::impl::TokenConsumer; + class CodePointMatcher : public NumberParseMatcher, public UMemory { public: @@ -51,9 +54,13 @@ class AffixTokenMatcherWarehouse { static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation public: - AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1, - const UnicodeString& currency2, const DecimalFormatSymbols& dfs, - IgnorablesMatcher* ignorables, const Locale& locale); + AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state + + AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1, + const UnicodeString* currency2, const DecimalFormatSymbols* dfs, + IgnorablesMatcher* ignorables, const Locale* locale); + + AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) = default; ~AffixTokenMatcherWarehouse(); @@ -70,12 +77,13 @@ class AffixTokenMatcherWarehouse { NumberParseMatcher& nextCodePointMatcher(UChar32 cp); private: + // NOTE: The following fields may be unsafe to access after construction is done! UChar currencyCode[4]; - const UnicodeString& currency1; - const UnicodeString& currency2; - const DecimalFormatSymbols& dfs; + const UnicodeString* currency1; + const UnicodeString* currency2; + const DecimalFormatSymbols* dfs; IgnorablesMatcher* ignorables; - const Locale locale; + const Locale* locale; // NOTE: These are default-constructed and should not be used until initialized. MinusSignMatcher fMinusSign; @@ -94,7 +102,7 @@ class AffixTokenMatcherWarehouse { }; -class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { +class AffixPatternMatcherBuilder : public TokenConsumer { public: AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, IgnorablesMatcher* ignorables); @@ -119,15 +127,19 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer { class AffixPatternMatcher : public ArraySeriesMatcher { public: + AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state + static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, AffixTokenMatcherWarehouse& warehouse, parse_flags_t parseFlags, bool* success, UErrorCode& status); - private: - UnicodeString fPattern; + UnicodeString getPattern() const; - AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state + bool operator==(const AffixPatternMatcher& other) const; + + private: + CompactUnicodeString<4> fPattern; AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); @@ -135,6 +147,65 @@ class AffixPatternMatcher : public ArraySeriesMatcher { }; +class AffixMatcher : public NumberParseMatcher, public UMemory { + public: + AffixMatcher() = default; // WARNING: Leaves the object in an unusable state + + AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); + + // static void createMatchers() is the constructor for AffixMatcherWarehouse in C++ + + bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; + + void postProcess(ParsedNumber& result) const override; + + const UnicodeSet& getLeadCodePoints() override; + + private: + AffixPatternMatcher* fPrefix; + AffixPatternMatcher* fSuffix; + result_flags_t fFlags; + + /** + * Helper method to return whether the given AffixPatternMatcher equals the given pattern string. + * Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal + * the given pattern string. + */ + static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString); +}; + + +/** + * A C++-only class to retain ownership of the AffixMatchers needed for parsing. + */ +class AffixMatcherWarehouse { + public: + AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state + + // in Java, this is AffixMatcher#createMatchers() + AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, NumberParserImpl& output, + AffixTokenMatcherWarehouse& warehouse, const IgnorablesMatcher& ignorables, + parse_flags_t parseFlags, UErrorCode& status); + + private: + // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix + AffixMatcher fAffixMatchers[9]; + // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each + AffixPatternMatcher fAffixPatternMatchers[6]; + // Store all the tokens used by the AffixPatternMatchers + AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse; + + static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, + parse_flags_t parseFlags, UErrorCode& status); + + /** + * Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both + * valid, whether they are equal according to operator==. Similar to Java Objects.equals() + */ + static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs); +}; + + } // namespace impl } // namespace numparse U_NAMESPACE_END diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp index 6654bea7de0..3ba12a68df7 100644 --- a/icu4c/source/i18n/numparse_symbols.cpp +++ b/icu4c/source/i18n/numparse_symbols.cpp @@ -23,7 +23,7 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key } } -const UnicodeSet* SymbolMatcher::getSet() { +const UnicodeSet* SymbolMatcher::getSet() const { return fUniSet; } diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h index c06724fbe72..cf5b8d86680 100644 --- a/icu4c/source/i18n/numparse_symbols.h +++ b/icu4c/source/i18n/numparse_symbols.h @@ -24,7 +24,7 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory { public: SymbolMatcher() = default; // WARNING: Leaves the object in an unusable state - const UnicodeSet* getSet(); + const UnicodeSet* getSet() const; bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h index 76aa75e0fcb..8a92dc93fbd 100644 --- a/icu4c/source/i18n/numparse_types.h +++ b/icu4c/source/i18n/numparse_types.h @@ -48,11 +48,35 @@ enum ParseFlags { PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400, }; -//template -//struct MaybeNeedsAdoption { -// T* ptr; -// bool needsAdoption; -//}; + +// TODO: Is this class worthwhile? +template +class CompactUnicodeString { + public: + CompactUnicodeString() { + static_assert(stackCapacity > 0, "cannot have zero space on stack"); + fBuffer[0] = 0; + } + + CompactUnicodeString(const UnicodeString& text) + : fBuffer(text.length() + 1) { + memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length()); + fBuffer[text.length()] = 0; + } + + inline UnicodeString toAliasedUnicodeString() const { + return UnicodeString(TRUE, fBuffer.getAlias(), -1); + } + + bool operator==(const CompactUnicodeString& other) const { + // Use the alias-only constructor and then call UnicodeString operator== + return toAliasedUnicodeString() == other.toAliasedUnicodeString(); + } + + private: + MaybeStackArray fBuffer; +}; + /** * Struct-like class to hold the results of a parsing routine. diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index 9cb8dd12d4a..4fc4da370d7 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -215,8 +215,12 @@ void NumberParserTest::testSeriesMatcher() { void NumberParserTest::testCurrencyAnyMatcher() { IcuTestErrorCode status(*this, "testCurrencyAnyMatcher"); + UnicodeString currency1(u"IU$"); + UnicodeString currency2(u"ICU"); + DecimalFormatSymbols symbols("en", status); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); - AffixTokenMatcherWarehouse warehouse(u"ICU", u"IU$", u"ICU", {"en",status}, &ignorables, "en"); + Locale locale("en"); + AffixTokenMatcherWarehouse warehouse(u"ICU", ¤cy1, ¤cy2, &symbols, &ignorables, &locale); NumberParseMatcher& matcher = warehouse.currency(status); static const struct TestCase{ @@ -248,8 +252,12 @@ void NumberParserTest::testCurrencyAnyMatcher() { void NumberParserTest::testAffixPatternMatcher() { IcuTestErrorCode status(*this, "testAffixPatternMatcher"); + UnicodeString currency1(u"foo"); + UnicodeString currency2(u"bar"); + DecimalFormatSymbols symbols("en", status); IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); - AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en"); + Locale locale("en"); + AffixTokenMatcherWarehouse warehouse(u"EUR", ¤cy1, ¤cy2, &symbols, &ignorables, &locale); static const struct TestCase { bool exactMatch; -- 2.40.0