#include "number_affixutils.h"
#include "unicode/utf16.h"
+#include "unicode/uniset.h"
using namespace icu;
using namespace icu::number;
return output;
}
+bool AffixUtils::containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
+ const UnicodeSet& ignorables, UErrorCode& status) {
+ if (affixPattern.length() == 0) {
+ return true;
+ };
+ AffixTag tag;
+ while (hasNext(tag, affixPattern)) {
+ tag = nextToken(tag, affixPattern, status);
+ if (U_FAILURE(status)) { return false; }
+ if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
+ return false;
+ }
+ }
+ return true;
+}
+
void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer,
UErrorCode& status) {
if (affixPattern.length() == 0) {
AffixPatternState state;
AffixPatternType type;
- AffixTag() : offset(0), state(STATE_BASE) {}
+ AffixTag()
+ : offset(0), state(STATE_BASE) {}
- AffixTag(int32_t offset) : offset(offset) {}
+ AffixTag(int32_t offset)
+ : offset(offset) {}
AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type)
- : offset(offset), codePoint(codePoint), state(state), type(type)
- {}
+ : offset(offset), codePoint(codePoint), state(state), type(type) {}
};
class TokenConsumer {
* @param patternString The original string whose width will be estimated.
* @return The length of the unescaped string.
*/
- static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status);
+ static int32_t estimateLength(const CharSequence& patternString, UErrorCode& status);
/**
* Takes a string and escapes (quotes) characters that have special meaning in the affix pattern
* @param input The string to be escaped.
* @return The resulting UnicodeString.
*/
- static UnicodeString escape(const CharSequence &input);
+ static UnicodeString escape(const CharSequence& input);
static Field getFieldForType(AffixPatternType type);
* @param position The index into the NumberStringBuilder to insert the string.
* @param provider An object to generate locale symbols.
*/
- static int32_t
- unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
- const SymbolProvider &provider, UErrorCode &status);
+ static int32_t unescape(const CharSequence& affixPattern, NumberStringBuilder& output,
+ int32_t position, const SymbolProvider& provider, UErrorCode& status);
/**
* Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape}
* @param provider An object to generate locale symbols.
* @return The same return value as if you called {@link #unescape}.
*/
- static int32_t unescapedCodePointCount(const CharSequence &affixPattern,
- const SymbolProvider &provider, UErrorCode &status);
+ static int32_t unescapedCodePointCount(const CharSequence& affixPattern,
+ const SymbolProvider& provider, UErrorCode& status);
/**
* Checks whether the given affix pattern contains at least one token of the given type, which is
* @param type The token type.
* @return true if the affix pattern contains the given token type; false otherwise.
*/
- static bool
- containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status);
+ static bool containsType(const CharSequence& affixPattern, AffixPatternType type, UErrorCode& status);
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("ยค").
* @param affixPattern The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
- static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status);
+ static bool hasCurrencySymbols(const CharSequence& affixPattern, UErrorCode& status);
/**
* Replaces all occurrences of tokens with the given type with the given replacement char.
* @param replacementChar The char to substitute in place of chars of the given token type.
* @return A string containing the new affix pattern.
*/
- static UnicodeString
- replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
- UErrorCode &status);
+ static UnicodeString replaceType(const CharSequence& affixPattern, AffixPatternType type,
+ char16_t replacementChar, UErrorCode& status);
+
+ /**
+ * Returns whether the given affix pattern contains only symbols and ignorables as defined by the
+ * given ignorables set.
+ */
+ static bool containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
+ const UnicodeSet& ignorables, UErrorCode& status);
/**
* Iterates over the affix pattern, calling the TokenConsumer for each token.
* (never negative), or -1 if there were no more tokens in the affix pattern.
* @see #hasNext
*/
- static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status);
+ static AffixTag nextToken(AffixTag tag, const CharSequence& patternString, UErrorCode& status);
/**
* Returns whether the affix pattern string has any more tokens to be retrieved from a call to
* @param string The affix pattern.
* @return true if there are more tokens to consume; false otherwise.
*/
- static bool hasNext(const AffixTag &tag, const CharSequence &string);
+ static bool hasNext(const AffixTag& tag, const CharSequence& string);
private:
/**
* The order of the arguments is consistent with Java, but the order of the stored
* fields is not necessarily the same.
*/
- static inline AffixTag
- makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) {
+ static inline AffixTag makeTag(int32_t offset, AffixPatternType type, AffixPatternState state,
+ UChar32 cp) {
return {offset, cp, state, type};
}
};
using namespace icu::number;
using namespace icu::number::impl;
-MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {}
+MutablePatternModifier::MutablePatternModifier(bool isStrong)
+ : fStrong(isStrong) {}
-void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) {
+void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternInfo) {
this->patternInfo = patternInfo;
}
this->perMilleReplacesPercent = perMille;
}
-void
-MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit ¤cy,
- const UNumberUnitWidth unitWidth, const PluralRules *rules) {
+void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, const CurrencyUnit& currency,
+ const UNumberUnitWidth unitWidth, const PluralRules* rules) {
U_ASSERT((rules != nullptr) == needsPlurals());
this->symbols = symbols;
- uprv_memcpy(static_cast<char16_t *>(this->currencyCode),
+ uprv_memcpy(static_cast<char16_t*>(this->currencyCode),
currency.getISOCurrency(),
sizeof(char16_t) * 4);
this->unitWidth = unitWidth;
// Silently ignore any error codes.
}
-ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) {
+ImmutablePatternModifier* MutablePatternModifier::createImmutable(UErrorCode& status) {
return createImmutableAndChain(nullptr, status);
}
-ImmutablePatternModifier *
-MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) {
+ImmutablePatternModifier*
+MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* parent, UErrorCode& status) {
// TODO: Move StandardPlural VALUES to standardplural.h
static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = {
} else {
// Faster path when plural keyword is not needed.
setNumberProperties(1, StandardPlural::Form::COUNT);
- Modifier *positive = createConstantModifier(status);
+ Modifier* positive = createConstantModifier(status);
setNumberProperties(0, StandardPlural::Form::COUNT);
- Modifier *zero = createConstantModifier(status);
+ Modifier* zero = createConstantModifier(status);
setNumberProperties(-1, StandardPlural::Form::COUNT);
- Modifier *negative = createConstantModifier(status);
+ Modifier* negative = createConstantModifier(status);
pm->adoptPositiveNegativeModifiers(positive, zero, negative);
if (U_FAILURE(status)) {
delete pm;
}
}
-ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) {
+ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) {
NumberStringBuilder a;
NumberStringBuilder b;
insertPrefix(a, 0, status);
insertSuffix(b, 0, status);
if (patternInfo->hasCurrencySign()) {
- return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
+ return new CurrencySpacingEnabledModifier(
+ a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
} else {
return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong);
}
}
-ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules,
- const MicroPropsGenerator *parent)
+ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
+ const MicroPropsGenerator* parent)
: pm(pm), rules(rules), parent(parent) {}
-void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps µs,
- UErrorCode &status) const {
+void ImmutablePatternModifier::processQuantity(DecimalQuantity& quantity, MicroProps& micros,
+ UErrorCode& status) const {
parent->processQuantity(quantity, micros, status);
applyToMicros(micros, quantity);
}
-void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const {
+void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const {
if (rules == nullptr) {
micros.modMiddle = pm->getModifier(quantity.signum());
} else {
}
/** Used by the unsafe code path. */
-MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) {
+MicroPropsGenerator& MutablePatternModifier::addToChain(const MicroPropsGenerator* parent) {
this->parent = parent;
return *this;
}
-void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps µs,
- UErrorCode &status) const {
+void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& micros,
+ UErrorCode& status) const {
parent->processQuantity(fq, micros, status);
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
- auto nonConstThis = const_cast<MutablePatternModifier *>(this);
+ auto nonConstThis = const_cast<MutablePatternModifier*>(this);
if (needsPlurals()) {
// TODO: Fix this. Avoid the copy.
DecimalQuantity copy(fq);
micros.modMiddle = this;
}
-int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
- UErrorCode &status) const {
+int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
+ UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
- auto nonConstThis = const_cast<MutablePatternModifier *>(this);
+ auto nonConstThis = const_cast<MutablePatternModifier*>(this);
int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status);
int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status);
// If the pattern had no decimal stem body (like #,##0.00), overwrite the value.
int32_t overwriteLen = 0;
if (!patternInfo->hasBody()) {
overwriteLen = output.splice(
- leftIndex + prefixLen, rightIndex + prefixLen,
- UnicodeString(), 0, 0, UNUM_FIELD_COUNT,
- status);
+ leftIndex + prefixLen,
+ rightIndex + prefixLen,
+ UnicodeString(),
+ 0,
+ 0,
+ UNUM_FIELD_COUNT,
+ status);
}
CurrencySpacingEnabledModifier::applyCurrencySpacing(
output,
return prefixLen + overwriteLen + suffixLen;
}
-int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const {
+int32_t MutablePatternModifier::getPrefixLength(UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
- auto nonConstThis = const_cast<MutablePatternModifier *>(this);
+ auto nonConstThis = const_cast<MutablePatternModifier*>(this);
// Enter and exit CharSequence Mode to get the length.
- nonConstThis->enterCharSequenceMode(true);
- int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
- nonConstThis->exitCharSequenceMode();
+ nonConstThis->prepareAffix(true);
+ int result = AffixUtils::unescapedCodePointCount(
+ UnicodeStringCharSequence(currentAffix),
+ *this,
+ status); // prefix length
return result;
}
-int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const {
+int32_t MutablePatternModifier::getCodePointCount(UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
- auto nonConstThis = const_cast<MutablePatternModifier *>(this);
-
- // Enter and exit CharSequence Mode to get the length.
- nonConstThis->enterCharSequenceMode(true);
- int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
- nonConstThis->exitCharSequenceMode();
- nonConstThis->enterCharSequenceMode(false);
- result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length
- nonConstThis->exitCharSequenceMode();
+ auto nonConstThis = const_cast<MutablePatternModifier*>(this);
+
+ // Render the affixes to get the length
+ nonConstThis->prepareAffix(true);
+ int result = AffixUtils::unescapedCodePointCount(
+ UnicodeStringCharSequence(currentAffix),
+ *this,
+ status); // prefix length
+ nonConstThis->prepareAffix(false);
+ result += AffixUtils::unescapedCodePointCount(
+ UnicodeStringCharSequence(currentAffix),
+ *this,
+ status); // suffix length
return result;
}
return fStrong;
}
-int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) {
- enterCharSequenceMode(true);
- int length = AffixUtils::unescape(*this, sb, position, *this, status);
- exitCharSequenceMode();
+int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) {
+ prepareAffix(true);
+ int length = AffixUtils::unescape(
+ UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
return length;
}
-int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) {
- enterCharSequenceMode(false);
- int length = AffixUtils::unescape(*this, sb, position, *this, status);
- exitCharSequenceMode();
+int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) {
+ prepareAffix(false);
+ int length = AffixUtils::unescape(
+ UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
return length;
}
+/** This method contains the heart of the logic for rendering LDML affix strings. */
+void MutablePatternModifier::prepareAffix(bool isPrefix) {
+ PatternStringUtils::patternInfoToStringBuilder(
+ *patternInfo, isPrefix, signum, signDisplay, plural, perMilleReplacesPercent, currentAffix);
+}
+
UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
switch (type) {
case AffixPatternType::TYPE_MINUS_SIGN:
return UnicodeString();
} else {
UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW)
- ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
- : UCurrNameStyle::UCURR_SYMBOL_NAME;
+ ? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
+ : UCurrNameStyle::UCURR_SYMBOL_NAME;
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
- const char16_t *symbol = ucurr_getName(
+ const char16_t* symbol = ucurr_getName(
currencyCode,
symbols->getLocale().getName(),
selector,
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
- const char16_t *symbol = ucurr_getPluralName(
+ const char16_t* symbol = ucurr_getPluralName(
currencyCode,
symbols->getLocale().getName(),
&isChoiceFormat,
}
}
-/** This method contains the heart of the logic for rendering LDML affix strings. */
-void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) {
- U_ASSERT(!inCharSequenceMode);
- inCharSequenceMode = true;
-
- // Should the output render '+' where '-' would normally appear in the pattern?
- plusReplacesMinusSign = signum != -1
- && (signDisplay == UNUM_SIGN_ALWAYS
- || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS
- || (signum == 1
- && (signDisplay == UNUM_SIGN_EXCEPT_ZERO
- || signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO)))
- && patternInfo->positiveHasPlusSign() == false;
-
- // Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.)
- bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && (
- signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign));
-
- // Resolve the flags for the affix pattern.
- fFlags = 0;
- if (useNegativeAffixPattern) {
- fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
- }
- if (isPrefix) {
- fFlags |= AffixPatternProvider::AFFIX_PREFIX;
- }
- if (plural != StandardPlural::Form::COUNT) {
- U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
- fFlags |= plural;
- }
-
- // Should we prepend a sign to the pattern?
- if (!isPrefix || useNegativeAffixPattern) {
- prependSign = false;
- } else if (signum == -1) {
- prependSign = signDisplay != UNUM_SIGN_NEVER;
- } else {
- prependSign = plusReplacesMinusSign;
- }
-
- // Finally, compute the length of the affix pattern.
- fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0);
-}
-
-void MutablePatternModifier::exitCharSequenceMode() {
- U_ASSERT(inCharSequenceMode);
- inCharSequenceMode = false;
-}
-
-int32_t MutablePatternModifier::length() const {
- U_ASSERT(inCharSequenceMode);
- return fLength;
-}
-
-char16_t MutablePatternModifier::charAt(int32_t index) const {
- U_ASSERT(inCharSequenceMode);
- char16_t candidate;
- if (prependSign && index == 0) {
- candidate = u'-';
- } else if (prependSign) {
- candidate = patternInfo->charAt(fFlags, index - 1);
- } else {
- candidate = patternInfo->charAt(fFlags, index);
- }
- if (plusReplacesMinusSign && candidate == u'-') {
- return u'+';
- }
- if (perMilleReplacesPercent && candidate == u'%') {
- return u'โฐ';
- }
- return candidate;
-}
-
UnicodeString MutablePatternModifier::toUnicodeString() const {
// Never called by AffixUtils
U_ASSERT(false);
// Exported as U_I18N_API because it is needed for the unit test PatternModifierTest
class U_I18N_API ImmutablePatternModifier : public MicroPropsGenerator, public UMemory {
public:
- ~ImmutablePatternModifier() U_OVERRIDE = default;
+ ~ImmutablePatternModifier() U_OVERRIDE = default;
- void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE;
+ void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE;
- void applyToMicros(MicroProps µs, DecimalQuantity &quantity) const;
+ void applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const;
private:
- ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, const MicroPropsGenerator *parent);
+ ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
+ const MicroPropsGenerator* parent);
const LocalPointer<ParameterizedModifier> pm;
- const PluralRules *rules;
- const MicroPropsGenerator *parent;
+ const PluralRules* rules;
+ const MicroPropsGenerator* parent;
- friend class MutablePatternModifier;
+ friend class MutablePatternModifier;
};
/**
: public MicroPropsGenerator,
public Modifier,
public SymbolProvider,
- public CharSequence,
public UMemory {
public:
*/
UnicodeString getSymbol(AffixPatternType type) const U_OVERRIDE;
- int32_t length() const U_OVERRIDE;
-
- char16_t charAt(int32_t index) const U_OVERRIDE;
-
- // Use default implementation of codePointAt
-
- UnicodeString toUnicodeString() const U_OVERRIDE;
+ UnicodeString toUnicodeString() const;
private:
// Modifier details (initialized in constructor)
// QuantityChain details (initialized in addToChain)
const MicroPropsGenerator *parent;
- // Transient CharSequence fields (initialized in enterCharSequenceMode)
- bool inCharSequenceMode = false;
- int32_t fFlags;
- int32_t fLength;
- bool prependSign;
- bool plusReplacesMinusSign;
+ // Transient fields for rendering
+ UnicodeString currentAffix;
/**
* Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
- void enterCharSequenceMode(bool isPrefix);
-
- void exitCharSequenceMode();
+ void prepareAffix(bool isPrefix);
};
using namespace icu::number;
using namespace icu::number::impl;
-void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) {
+void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
+ UErrorCode& status) {
patternInfo.consumePattern(patternString, status);
}
DecimalFormatProperties
PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
- UErrorCode &status) {
+ UErrorCode& status) {
DecimalFormatProperties properties;
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
return properties;
}
-void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
- IgnoreRounding ignoreRounding, UErrorCode &status) {
+void
+PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
+ IgnoreRounding ignoreRounding, UErrorCode& status) {
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
}
char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
- const Endpoints &endpoints = getEndpoints(flags);
+ const Endpoints& endpoints = getEndpoints(flags);
if (index < 0 || index >= endpoints.end - endpoints.start) {
U_ASSERT(false);
}
return getLengthFromEndpoints(getEndpoints(flags));
}
-int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) {
+int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
return endpoints.end - endpoints.start;
}
UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
- const Endpoints &endpoints = getEndpoints(flags);
+ const Endpoints& endpoints = getEndpoints(flags);
if (endpoints.start == endpoints.end) {
return UnicodeString();
}
return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
}
-const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const {
+const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
bool prefix = (flags & AFFIX_PREFIX) != 0;
bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
bool padding = (flags & AFFIX_PADDING) != 0;
return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
}
-bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const {
+bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status);
}
return codePoint;
}
-void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) {
+void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
this->pattern = patternString;
}
}
-void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
+void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
// subpattern := literals? number exponent? literals?
consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
if (U_FAILURE(status)) { return; }
if (U_FAILURE(status)) { return; }
}
-void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) {
+void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
if (state.peek() != u'*') {
return;
}
currentSubpattern->paddingEndpoints.end = state.offset;
}
-void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
+void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
// literals := { literal }
endpoints.start = state.offset;
while (true) {
endpoints.end = state.offset;
}
-void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
+void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
if (state.peek() == -1) {
state.toParseException(u"Expected unquoted literal but found EOL");
status = U_PATTERN_SYNTAX_ERROR;
}
}
-void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
+void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
consumeIntegerFormat(status);
if (U_FAILURE(status)) { return; }
if (state.peek() == u'.') {
}
}
-void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
+void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
// Convenience reference:
- ParsedSubpatternInfo &result = *currentSubpattern;
+ ParsedSubpatternInfo& result = *currentSubpattern;
while (true) {
switch (state.peek()) {
}
}
-void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
+void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
// Convenience reference:
- ParsedSubpatternInfo &result = *currentSubpattern;
+ ParsedSubpatternInfo& result = *currentSubpattern;
int32_t zeroCounter = 0;
while (true) {
}
}
-void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
+void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
// Convenience reference:
- ParsedSubpatternInfo &result = *currentSubpattern;
+ ParsedSubpatternInfo& result = *currentSubpattern;
if (state.peek() != u'E') {
return;
/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
///////////////////////////////////////////////////
-void
-PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
- IgnoreRounding ignoreRounding, UErrorCode &status) {
+void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
+ DecimalFormatProperties& properties,
+ IgnoreRounding ignoreRounding, UErrorCode& status) {
if (pattern.length() == 0) {
// Backwards compatibility requires that we reset to the default values.
// TODO: Only overwrite the properties that "saveToProperties" normally touches?
patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
}
-void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
- ParsedPatternInfo& patternInfo,
- IgnoreRounding _ignoreRounding, UErrorCode &status) {
+void
+PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
+ IgnoreRounding _ignoreRounding, UErrorCode& status) {
// Translate from PatternParseResult to Properties.
// Note that most data from "negative" is ignored per the specification of DecimalFormat.
- const ParsedSubpatternInfo &positive = patternInfo.positive;
+ const ParsedSubpatternInfo& positive = patternInfo.positive;
bool ignoreRounding;
if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
properties.maximumFractionDigits = -1;
properties.roundingIncrement = 0.0;
properties.minimumSignificantDigits = positive.integerAtSigns;
- properties.maximumSignificantDigits =
- positive.integerAtSigns + positive.integerTrailingHashSigns;
+ properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
} else if (!positive.rounding.isZero()) {
if (!ignoreRounding) {
properties.minimumFractionDigits = minFrac;
// Padding settings
if (!positive.paddingLocation.isNull()) {
// The width of the positive prefix and suffix templates are included in the padding
- int paddingWidth =
- positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
- AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
+ int paddingWidth = positive.widthExceptAffixes +
+ AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
+ AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
properties.formatWidth = paddingWidth;
UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
if (rawPaddingString.length() == 1) {
/// End PatternStringParser.java; begin PatternStringUtils.java ///
///////////////////////////////////////////////////////////////////
-UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties,
- UErrorCode &status) {
+UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
+ UErrorCode& status) {
UnicodeString sb;
// Convenience references
int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
int paddingWidth = uprv_min(properties.formatWidth, dosMax);
- NullableValue<PadPosition> paddingLocation = properties.padPosition;
+ NullableValue <PadPosition> paddingLocation = properties.padPosition;
UnicodeString paddingString = properties.padString;
int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
}
int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
- UErrorCode &status) {
- (void)status;
+ UErrorCode& status) {
+ (void) status;
if (input.length() == 0) {
input.setTo(kFallbackPaddingString, -1);
}
return output.length() - startLength;
}
+void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
+ int8_t signum, UNumberSignDisplay signDisplay,
+ StandardPlural::Form plural,
+ bool perMilleReplacesPercent, UnicodeString& output) {
+
+ // Should the output render '+' where '-' would normally appear in the pattern?
+ bool plusReplacesMinusSign = signum != -1 && (
+ signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
+ signum == 1 && (
+ signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
+ signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
+ patternInfo.positiveHasPlusSign() == false;
+
+ // Should we use the affix from the negative subpattern? (If not, we will use the positive
+ // subpattern.)
+ bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
+ signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
+
+ // Resolve the flags for the affix pattern.
+ int flags = 0;
+ if (useNegativeAffixPattern) {
+ flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
+ }
+ if (isPrefix) {
+ flags |= AffixPatternProvider::AFFIX_PREFIX;
+ }
+ if (plural != StandardPlural::Form::COUNT) {
+ U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
+ flags |= plural;
+ }
+
+ // Should we prepend a sign to the pattern?
+ bool prependSign;
+ if (!isPrefix || useNegativeAffixPattern) {
+ prependSign = false;
+ } else if (signum == -1) {
+ prependSign = signDisplay != UNUM_SIGN_NEVER;
+ } else {
+ prependSign = plusReplacesMinusSign;
+ }
+
+ // Compute the length of the affix pattern.
+ int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
+
+ // Finally, set the result into the StringBuilder.
+ output.remove();
+ for (int index = 0; index < length; index++) {
+ char16_t candidate;
+ if (prependSign && index == 0) {
+ candidate = u'-';
+ } else if (prependSign) {
+ candidate = patternInfo.charAt(flags, index - 1);
+ } else {
+ candidate = patternInfo.charAt(flags, index);
+ }
+ if (plusReplacesMinusSign && candidate == u'-') {
+ candidate = u'+';
+ }
+ if (perMilleReplacesPercent && candidate == u'%') {
+ candidate = u'โฐ';
+ }
+ output.append(candidate);
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
ParsedSubpatternInfo positive;
ParsedSubpatternInfo negative;
- ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {}
+ ParsedPatternInfo()
+ : state(this->pattern), currentSubpattern(nullptr) {}
~ParsedPatternInfo() U_OVERRIDE = default;
- static int32_t getLengthFromEndpoints(const Endpoints &endpoints);
+ static int32_t getLengthFromEndpoints(const Endpoints& endpoints);
char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE;
int32_t length(int32_t flags) const U_OVERRIDE;
- UnicodeString getString(int32_t flags) const;
+ UnicodeString getString(int32_t flags) const U_OVERRIDE;
bool positiveHasPlusSign() const U_OVERRIDE;
bool hasCurrencySign() const U_OVERRIDE;
- bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE;
+ bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE;
bool hasBody() const U_OVERRIDE;
private:
struct U_I18N_API ParserState {
- const UnicodeString &pattern; // reference to the parent
+ const UnicodeString& pattern; // reference to the parent
int32_t offset = 0;
- explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {};
+ explicit ParserState(const UnicodeString& _pattern)
+ : pattern(_pattern) {};
UChar32 peek();
// TODO: We don't currently do anything with the message string.
// This method is here as a shell for Java compatibility.
- inline void toParseException(const char16_t *message) { (void)message; }
- }
- state;
+ inline void toParseException(const char16_t* message) { (void) message; }
+ } state;
// NOTE: In Java, these are written as pure functions.
// In C++, they're written as methods.
// The behavior is the same.
// Mutable transient pointer:
- ParsedSubpatternInfo *currentSubpattern;
+ ParsedSubpatternInfo* currentSubpattern;
// In Java, "negative == null" tells us whether or not we had a negative subpattern.
// In C++, we need to remember in another boolean.
bool fHasNegativeSubpattern = false;
- const Endpoints &getEndpoints(int32_t flags) const;
+ const Endpoints& getEndpoints(int32_t flags) const;
/** Run the recursive descent parser. */
- void consumePattern(const UnicodeString &patternString, UErrorCode &status);
+ void consumePattern(const UnicodeString& patternString, UErrorCode& status);
- void consumeSubpattern(UErrorCode &status);
+ void consumeSubpattern(UErrorCode& status);
- void consumePadding(PadPosition paddingLocation, UErrorCode &status);
+ void consumePadding(PadPosition paddingLocation, UErrorCode& status);
- void consumeAffix(Endpoints &endpoints, UErrorCode &status);
+ void consumeAffix(Endpoints& endpoints, UErrorCode& status);
- void consumeLiteral(UErrorCode &status);
+ void consumeLiteral(UErrorCode& status);
- void consumeFormat(UErrorCode &status);
+ void consumeFormat(UErrorCode& status);
- void consumeIntegerFormat(UErrorCode &status);
+ void consumeIntegerFormat(UErrorCode& status);
- void consumeFractionFormat(UErrorCode &status);
+ void consumeFractionFormat(UErrorCode& status);
- void consumeExponent(UErrorCode &status);
+ void consumeExponent(UErrorCode& status);
friend class PatternParser;
};
* The LDML decimal format pattern (Excel-style pattern) to parse.
* @return The results of the parse.
*/
- static void
- parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status);
+ static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
+ UErrorCode& status);
enum IgnoreRounding {
IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
* @throws IllegalArgumentException
* If there is a syntax error in the pattern string.
*/
- static DecimalFormatProperties
- parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status);
+ static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
+ IgnoreRounding ignoreRounding, UErrorCode& status);
/**
* Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
* @throws IllegalArgumentException
* If there was a syntax error in the pattern string.
*/
- static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
- IgnoreRounding ignoreRounding, UErrorCode &status);
+ static void parseToExistingProperties(const UnicodeString& pattern,
+ DecimalFormatProperties& properties,
+ IgnoreRounding ignoreRounding, UErrorCode& status);
private:
- static void
- parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
- IgnoreRounding ignoreRounding, UErrorCode &status);
+ static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
+ DecimalFormatProperties& properties,
+ IgnoreRounding ignoreRounding, UErrorCode& status);
/** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
- static void
- patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo& patternInfo,
- IgnoreRounding _ignoreRounding, UErrorCode &status);
+ static void patternInfoToProperties(DecimalFormatProperties& properties,
+ ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
+ UErrorCode& status);
};
class U_I18N_API PatternStringUtils {
* The property bag to serialize.
* @return A pattern string approximately serializing the property bag.
*/
- static UnicodeString
- propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status);
+ static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
+ UErrorCode& status);
/**
* notation.
* @return The pattern expressed in the other notation.
*/
- static UnicodeString
- convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized,
- UErrorCode &status);
+ static UnicodeString convertLocalized(UnicodeString input, DecimalFormatSymbols symbols,
+ bool toLocalized, UErrorCode& status);
+
+ /**
+ * This method contains the heart of the logic for rendering LDML affix strings. It handles
+ * sign-always-shown resolution, whether to use the positive or negative subpattern, permille
+ * substitution, and plural forms for CurrencyPluralInfo.
+ */
+ static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
+ int8_t signum, UNumberSignDisplay signDisplay,
+ StandardPlural::Form plural, bool perMilleReplacesPercent,
+ UnicodeString& output);
private:
/** @return The number of chars inserted. */
- static int
- escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status);
+ static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
+ UErrorCode& status);
};
} // namespace impl
#include "uassert.h"
#include "unicode/platform.h"
-U_NAMESPACE_BEGIN
-namespace number {
+U_NAMESPACE_BEGIN namespace number {
namespace impl {
// Typedef several enums for brevity and for easier comparison to Java.
};
enum CompactType {
- TYPE_DECIMAL,
- TYPE_CURRENCY
+ TYPE_DECIMAL, TYPE_CURRENCY
};
// TODO: Should this be moved somewhere else, maybe where other ICU classes can use it?
// Exported as U_I18N_API because it is a base class for other exported types
class U_I18N_API CharSequence {
-public:
+ public:
virtual ~CharSequence() = default;
virtual int32_t length() const = 0;
static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200;
static const int32_t AFFIX_PADDING = 0x400;
+ // Convenience compound flags
+ static const int32_t AFFIX_POS_PREFIX = AFFIX_PREFIX;
+ static const int32_t AFFIX_POS_SUFFIX = 0;
+ static const int32_t AFFIX_NEG_PREFIX = AFFIX_PREFIX | AFFIX_NEGATIVE_SUBPATTERN;
+ static const int32_t AFFIX_NEG_SUFFIX = AFFIX_NEGATIVE_SUBPATTERN;
+
virtual ~AffixPatternProvider() = default;
virtual char16_t charAt(int flags, int i) const = 0;
virtual int length(int flags) const = 0;
+ virtual UnicodeString getString(int flags) const = 0;
+
virtual bool hasCurrencySign() const = 0;
virtual bool positiveHasPlusSign() const = 0;
virtual bool negativeHasMinusSign() const = 0;
- virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0;
+ virtual bool containsSymbolType(AffixPatternType, UErrorCode&) const = 0;
/**
* True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not
* formatted.
* @return The number of characters (UTF-16 code units) that were added to the string builder.
*/
- virtual int32_t
- apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0;
+ virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex,
+ UErrorCode& status) const = 0;
/**
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the
/**
* Returns the number of code points in the modifier, prefix plus suffix.
*/
- virtual int32_t getCodePointCount(UErrorCode &status) const = 0;
+ virtual int32_t getCodePointCount(UErrorCode& status) const = 0;
/**
* Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed
* The MicroProps instance to populate.
* @return A MicroProps instance resolved for the quantity.
*/
- virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0;
+ virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros,
+ UErrorCode& status) const = 0;
};
/**
template<typename T>
class U_I18N_API NullableValue {
public:
- NullableValue() : fNull(true) {}
+ NullableValue()
+ : fNull(true) {}
- NullableValue(const NullableValue<T> &other) = default;
+ NullableValue(const NullableValue<T>& other) = default;
- explicit NullableValue(const T &other) {
+ explicit NullableValue(const T& other) {
fValue = other;
fNull = false;
}
- NullableValue<T> &operator=(const NullableValue<T> &other) = default;
+ NullableValue<T>& operator=(const NullableValue<T>& other) = default;
- NullableValue<T> &operator=(const T &other) {
+ NullableValue<T>& operator=(const T& other) {
fValue = other;
fNull = false;
return *this;
}
- bool operator==(const NullableValue &other) const {
+ bool operator==(const NullableValue& other) const {
// "fValue == other.fValue" returns UBool, not bool (causes compiler warnings)
return fNull ? other.fNull : (other.fNull ? false : static_cast<bool>(fValue == other.fValue));
}
return fNull;
}
- T get(UErrorCode &status) const {
+ T get(UErrorCode& status) const {
if (fNull) {
status = U_UNDEFINED_VARIABLE;
}
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
- const UnicodeString& currency1,
- const UnicodeString& currency2,
- const DecimalFormatSymbols& dfs,
- IgnorablesMatcher* ignorables, const Locale& locale)
+ const UnicodeString* currency1,
+ const UnicodeString* currency2,
+ const DecimalFormatSymbols* dfs,
+ IgnorablesMatcher* ignorables, const Locale* locale)
: currency1(currency1),
currency2(currency2),
dfs(dfs),
}
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
- return fMinusSign = {dfs, true};
+ return fMinusSign = {*dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
- return fPlusSign = {dfs, true};
+ return fPlusSign = {*dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
- return fPercent = {dfs};
+ return fPercent = {*dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
- return fPermille = {dfs};
+ return fPermille = {*dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
- return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}};
+ return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
const UnicodeString& pattern)
- : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
+ : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {}
+
+UnicodeString AffixPatternMatcher::getPattern() const {
+ return fPattern.toAliasedUnicodeString();
+}
+
+bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
+ return fPattern == other.fPattern;
+}
+
+
+AffixMatcherWarehouse::AffixMatcherWarehouse(const AffixPatternProvider& patternInfo,
+ NumberParserImpl& output,
+ AffixTokenMatcherWarehouse& warehouse,
+ const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
+ UErrorCode& status)
+ : fAffixTokenMatcherWarehouse(std::move(warehouse)) {
+ if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
+ return;
+ }
+
+ // The affixes have interesting characters, or we are in strict mode.
+ // Use initial capacity of 6, the highest possible number of AffixMatchers.
+ UnicodeString sb;
+ bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
+ UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS
+ : UNUM_SIGN_NEVER;
+
+ int32_t numAffixMatchers = 0;
+ int32_t numAffixPatternMatchers = 0;
+
+ AffixPatternMatcher* posPrefix = nullptr;
+ AffixPatternMatcher* posSuffix = nullptr;
+
+ // Pre-process the affix strings to resolve LDML rules like sign display.
+ for (int8_t signum = 1; signum >= -1; signum--) {
+ // Generate Prefix
+ bool hasPrefix = false;
+ PatternStringUtils::patternInfoToStringBuilder(
+ patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
+ fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
+ sb, warehouse, parseFlags, &hasPrefix, status);
+ AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
+ : nullptr;
+
+ // Generate Suffix
+ bool hasSuffix = false;
+ PatternStringUtils::patternInfoToStringBuilder(
+ patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
+ fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
+ sb, warehouse, parseFlags, &hasSuffix, status);
+ AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
+ : nullptr;
+
+ if (signum == 1) {
+ posPrefix = prefix;
+ posSuffix = suffix;
+ } else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) {
+ // Skip adding these matchers (we already have equivalents)
+ continue;
+ }
+
+ // Flags for setting in the ParsedNumber
+ int flags = (signum == -1) ? FLAG_NEGATIVE : 0;
+
+ // Note: it is indeed possible for posPrefix and posSuffix to both be null.
+ // We still need to add that matcher for strict mode to work.
+ fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
+ if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
+ // The following if statements are designed to prevent adding two identical matchers.
+ if (signum == 1 || equals(prefix, posPrefix)) {
+ fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
+ }
+ if (signum == 1 || equals(suffix, posSuffix)) {
+ fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
+ }
+ }
+ }
+
+ // Put the AffixMatchers in order, and then add them to the output.
+ // TODO
+// Collections.sort(matchers, COMPARATOR);
+// output.addMatchers(matchers);
+}
+
+bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
+ const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
+ UErrorCode& status) {
+ UnicodeStringCharSequence posPrefixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_PREFIX));
+ UnicodeStringCharSequence posSuffixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
+ UnicodeStringCharSequence negPrefixString(UnicodeString(u""));
+ UnicodeStringCharSequence negSuffixString(UnicodeString(u""));
+ if (patternInfo.hasNegativeSubpattern()) {
+ negPrefixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
+ negSuffixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
+ }
+
+ if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) &&
+ AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) &&
+ AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) &&
+ AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) &&
+ AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status)
+ // HACK: Plus and minus sign are a special case: we accept them trailing only if they are
+ // trailing in the pattern string.
+ && !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) &&
+ !AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) &&
+ !AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) &&
+ !AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) {
+ // The affixes contain only symbols and ignorables.
+ // No need to generate affix matchers.
+ return false;
+ }
+ return true;
+}
+
+bool AffixMatcherWarehouse::equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) {
+ if (lhs == nullptr && rhs == nullptr) {
+ return true;
+ }
+ if (lhs == nullptr || rhs == nullptr) {
+ return false;
+ }
+ return *lhs == *rhs;
+}
+
+
+AffixMatcher::AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags)
+ : fPrefix(prefix), fSuffix(suffix), fFlags(flags) {}
+
+bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ if (!result.seenNumber()) {
+ // Prefix
+ // Do not match if:
+ // 1. We have already seen a prefix (result.prefix != null)
+ // 2. The prefix in this AffixMatcher is empty (prefix == null)
+ if (!result.prefix.isBogus() || fPrefix == nullptr) {
+ return false;
+ }
+
+ // Attempt to match the prefix.
+ int initialOffset = segment.getOffset();
+ bool maybeMore = fPrefix->match(segment, result, status);
+ if (initialOffset != segment.getOffset()) {
+ result.prefix = fPrefix->getPattern();
+ }
+ return maybeMore;
+
+ } else {
+ // Suffix
+ // Do not match if:
+ // 1. We have already seen a suffix (result.suffix != null)
+ // 2. The suffix in this AffixMatcher is empty (suffix == null)
+ // 3. The matched prefix does not equal this AffixMatcher's prefix
+ if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) {
+ return false;
+ }
+
+ // Attempt to match the suffix.
+ int initialOffset = segment.getOffset();
+ bool maybeMore = fSuffix->match(segment, result, status);
+ if (initialOffset != segment.getOffset()) {
+ result.suffix = fSuffix->getPattern();
+ }
+ return maybeMore;
+ }
+}
+
+const UnicodeSet& AffixMatcher::getLeadCodePoints() {
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
+ if (fPrefix != nullptr) {
+ leadCodePoints->addAll(fPrefix->getLeadCodePoints());
+ }
+ if (fSuffix != nullptr) {
+ leadCodePoints->addAll(fSuffix->getLeadCodePoints());
+ }
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+ }
+ return *fLocalLeadCodePoints;
+}
+
+void AffixMatcher::postProcess(ParsedNumber& result) const {
+ // Check to see if our affix is the one that was matched. If so, set the flags in the result.
+ if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) {
+ // Fill in the result prefix and suffix with non-null values (empty string).
+ // Used by strict mode to determine whether an entire affix pair was matched.
+ if (result.prefix.isBogus()) {
+ result.prefix = UnicodeString();
+ }
+ if (result.suffix.isBogus()) {
+ result.suffix = UnicodeString();
+ }
+ result.flags |= fFlags;
+ }
+}
+
+bool AffixMatcher::matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) {
+ return (affix == nullptr && patternString.isBogus()) ||
+ (affix != nullptr && affix->getPattern() == patternString);
}
#endif /* #if !UCONFIG_NO_FORMATTING */
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
class AffixPatternMatcherBuilder;
class AffixPatternMatcher;
+using ::icu::number::impl::AffixPatternProvider;
+using ::icu::number::impl::TokenConsumer;
+
class CodePointMatcher : public NumberParseMatcher, public UMemory {
public:
static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
public:
- AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1,
- const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
- IgnorablesMatcher* ignorables, const Locale& locale);
+ AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
+
+ AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1,
+ const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
+ IgnorablesMatcher* ignorables, const Locale* locale);
+
+ AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) = default;
~AffixTokenMatcherWarehouse();
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
private:
+ // NOTE: The following fields may be unsafe to access after construction is done!
UChar currencyCode[4];
- const UnicodeString& currency1;
- const UnicodeString& currency2;
- const DecimalFormatSymbols& dfs;
+ const UnicodeString* currency1;
+ const UnicodeString* currency2;
+ const DecimalFormatSymbols* dfs;
IgnorablesMatcher* ignorables;
- const Locale locale;
+ const Locale* locale;
// NOTE: These are default-constructed and should not be used until initialized.
MinusSignMatcher fMinusSign;
};
-class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
+class AffixPatternMatcherBuilder : public TokenConsumer {
public:
AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables);
class AffixPatternMatcher : public ArraySeriesMatcher {
public:
+ AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
+
static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
AffixTokenMatcherWarehouse& warehouse,
parse_flags_t parseFlags, bool* success,
UErrorCode& status);
- private:
- UnicodeString fPattern;
+ UnicodeString getPattern() const;
- AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
+ bool operator==(const AffixPatternMatcher& other) const;
+
+ private:
+ CompactUnicodeString<4> fPattern;
AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
};
+class AffixMatcher : public NumberParseMatcher, public UMemory {
+ public:
+ AffixMatcher() = default; // WARNING: Leaves the object in an unusable state
+
+ AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
+
+ // static void createMatchers() is the constructor for AffixMatcherWarehouse in C++
+
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ void postProcess(ParsedNumber& result) const override;
+
+ const UnicodeSet& getLeadCodePoints() override;
+
+ private:
+ AffixPatternMatcher* fPrefix;
+ AffixPatternMatcher* fSuffix;
+ result_flags_t fFlags;
+
+ /**
+ * Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
+ * Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
+ * the given pattern string.
+ */
+ static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString);
+};
+
+
+/**
+ * A C++-only class to retain ownership of the AffixMatchers needed for parsing.
+ */
+class AffixMatcherWarehouse {
+ public:
+ AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
+
+ // in Java, this is AffixMatcher#createMatchers()
+ AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, NumberParserImpl& output,
+ AffixTokenMatcherWarehouse& warehouse, const IgnorablesMatcher& ignorables,
+ parse_flags_t parseFlags, UErrorCode& status);
+
+ private:
+ // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
+ AffixMatcher fAffixMatchers[9];
+ // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
+ AffixPatternMatcher fAffixPatternMatchers[6];
+ // Store all the tokens used by the AffixPatternMatchers
+ AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
+
+ static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
+ parse_flags_t parseFlags, UErrorCode& status);
+
+ /**
+ * Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both
+ * valid, whether they are equal according to operator==. Similar to Java Objects.equals()
+ */
+ static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs);
+};
+
+
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
}
}
-const UnicodeSet* SymbolMatcher::getSet() {
+const UnicodeSet* SymbolMatcher::getSet() const {
return fUniSet;
}
public:
SymbolMatcher() = default; // WARNING: Leaves the object in an unusable state
- const UnicodeSet* getSet();
+ const UnicodeSet* getSet() const;
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
};
-//template<typename T>
-//struct MaybeNeedsAdoption {
-// T* ptr;
-// bool needsAdoption;
-//};
+
+// TODO: Is this class worthwhile?
+template<int32_t stackCapacity>
+class CompactUnicodeString {
+ public:
+ CompactUnicodeString() {
+ static_assert(stackCapacity > 0, "cannot have zero space on stack");
+ fBuffer[0] = 0;
+ }
+
+ CompactUnicodeString(const UnicodeString& text)
+ : fBuffer(text.length() + 1) {
+ memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length());
+ fBuffer[text.length()] = 0;
+ }
+
+ inline UnicodeString toAliasedUnicodeString() const {
+ return UnicodeString(TRUE, fBuffer.getAlias(), -1);
+ }
+
+ bool operator==(const CompactUnicodeString& other) const {
+ // Use the alias-only constructor and then call UnicodeString operator==
+ return toAliasedUnicodeString() == other.toAliasedUnicodeString();
+ }
+
+ private:
+ MaybeStackArray<UChar, stackCapacity> fBuffer;
+};
+
/**
* Struct-like class to hold the results of a parsing routine.
void NumberParserTest::testCurrencyAnyMatcher() {
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
+ UnicodeString currency1(u"IU$");
+ UnicodeString currency2(u"ICU");
+ DecimalFormatSymbols symbols("en", status);
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
- AffixTokenMatcherWarehouse warehouse(u"ICU", u"IU$", u"ICU", {"en",status}, &ignorables, "en");
+ Locale locale("en");
+ AffixTokenMatcherWarehouse warehouse(u"ICU", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
NumberParseMatcher& matcher = warehouse.currency(status);
static const struct TestCase{
void NumberParserTest::testAffixPatternMatcher() {
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
+ UnicodeString currency1(u"foo");
+ UnicodeString currency2(u"bar");
+ DecimalFormatSymbols symbols("en", status);
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
- AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
+ Locale locale("en");
+ AffixTokenMatcherWarehouse warehouse(u"EUR", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
static const struct TestCase {
bool exactMatch;