number_rounding.o number_scientific.o number_stringbuilder.o \
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
numparse_impl.o numparse_symbols.o numparse_decimal.o numparse_scientific.o \
-numparse_currency.o
+numparse_currency.o numparse_affixes.o numparse_compositions.o
## Header files to install
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_affixes.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+
+
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __NUMPARSE_AFFIXES_H__
+#define __NUMPARSE_AFFIXES_H__
+
+#include "numparse_types.h"
+
+U_NAMESPACE_BEGIN
+namespace numparse {
+namespace impl {
+
+
+
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__NUMPARSE_AFFIXES_H__
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "numparse_types.h"
+#include "numparse_compositions.h"
+#include "unicode/uniset.h"
+
+using namespace icu;
+using namespace icu::numparse;
+using namespace icu::numparse::impl;
+
+
+bool AnyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ int32_t initialOffset = segment.getOffset();
+ bool maybeMore = false;
+
+ // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+ for (auto* matcher : *this) {
+ maybeMore = maybeMore || matcher->match(segment, result, status);
+ if (segment.getOffset() != initialOffset) {
+ // Match succeeded.
+ // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
+ // accept any string starting with A. Therefore, there is no possibility that matchers
+ // later in the list may be evaluated on longer strings, and we can exit the loop here.
+ break;
+ }
+ }
+
+ // None of the matchers succeeded.
+ return maybeMore;
+}
+
+void AnyMatcher::postProcess(ParsedNumber& result) const {
+ // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+ for (auto* matcher : *this) {
+ matcher->postProcess(result);
+ }
+}
+
+
+bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
+ ParsedNumber backup(result);
+
+ int32_t initialOffset = segment.getOffset();
+ bool maybeMore = true;
+ for (auto* it = begin(); it < end();) {
+ const NumberParseMatcher* matcher = *it;
+ int matcherOffset = segment.getOffset();
+ if (segment.length() != 0) {
+ maybeMore = matcher->match(segment, result, status);
+ } else {
+ // Nothing for this matcher to match; ask for more.
+ maybeMore = true;
+ }
+
+ bool success = (segment.getOffset() != matcherOffset);
+ bool isFlexible = matcher->isFlexible();
+ if (success && isFlexible) {
+ // Match succeeded, and this is a flexible matcher. Re-run it.
+ } else if (success) {
+ // Match succeeded, and this is NOT a flexible matcher. Proceed to the next matcher.
+ it++;
+ } else if (isFlexible) {
+ // Match failed, and this is a flexible matcher. Try again with the next matcher.
+ it++;
+ } else {
+ // Match failed, and this is NOT a flexible matcher. Exit.
+ segment.setOffset(initialOffset);
+ result = backup;
+ return maybeMore;
+ }
+ }
+
+ // All matchers in the series succeeded.
+ return maybeMore;
+}
+
+void SeriesMatcher::postProcess(ParsedNumber& result) const {
+ // NOTE: The range-based for loop calls the virtual begin() and end() methods.
+ for (auto* matcher : *this) {
+ matcher->postProcess(result);
+ }
+}
+
+
+ArraySeriesMatcher::ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen)
+ : fMatchers(matchers), fMatchersLen(matchersLen) {}
+
+const UnicodeSet& ArraySeriesMatcher::getLeadCodePoints() {
+ // SeriesMatchers are never allowed to start with a Flexible matcher.
+ U_ASSERT(!fMatchers[0]->isFlexible());
+ return fMatchers[0]->getLeadCodePoints();
+}
+
+const NumberParseMatcher* const* ArraySeriesMatcher::begin() const {
+ return fMatchers.getAlias();
+}
+
+const NumberParseMatcher* const* ArraySeriesMatcher::end() const {
+ return fMatchers.getAlias() + fMatchersLen;
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+#ifndef __SOURCE_NUMPARSE_COMPOSITIONS__
+#define __SOURCE_NUMPARSE_COMPOSITIONS__
+
+#include "numparse_types.h"
+
+U_NAMESPACE_BEGIN namespace numparse {
+namespace impl {
+
+
+/**
+ * Base class for AnyMatcher and SeriesMatcher.
+ */
+class CompositionMatcher : public NumberParseMatcher {
+ protected:
+ // No construction except by subclasses!
+ CompositionMatcher() = default;
+
+ // To be overridden by subclasses (used for iteration):
+ virtual const NumberParseMatcher* const* begin() const = 0;
+
+ // To be overridden by subclasses (used for iteration):
+ virtual const NumberParseMatcher* const* end() const = 0;
+};
+
+
+/**
+ * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
+ * the first matcher in the list to succeed.
+ *
+ * NOTE: In C++, this is a base class, unlike ICU4J, which uses a factory-style interface.
+ *
+ * @author sffc
+ * @see SeriesMatcher
+ */
+class AnyMatcher : public CompositionMatcher {
+ public:
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ void postProcess(ParsedNumber& result) const override;
+
+ protected:
+ // No construction except by subclasses!
+ AnyMatcher() = default;
+};
+
+
+/**
+ * Composes a number of matchers, running one after another. Matches the input string only if all of the
+ * matchers in the series succeed. Performs greedy matches within the context of the series.
+ *
+ * @author sffc
+ * @see AnyMatcher
+ */
+class SeriesMatcher : public CompositionMatcher {
+ public:
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ void postProcess(ParsedNumber& result) const override;
+
+ protected:
+ // No construction except by subclasses!
+ SeriesMatcher() = default;
+};
+
+
+/**
+ * An implementation of SeriesMatcher that references an array of matchers.
+ *
+ * The object adopts the array, but NOT the matchers contained inside the array.
+ */
+class ArraySeriesMatcher : public SeriesMatcher {
+ public:
+ /** The array is adopted, but NOT the matchers inside the array. */
+ ArraySeriesMatcher(NumberParseMatcher** matchers, int32_t matchersLen);
+
+ const UnicodeSet& getLeadCodePoints() override;
+
+ protected:
+ const NumberParseMatcher* const* begin() const override;
+
+ const NumberParseMatcher* const* end() const override;
+
+ private:
+ LocalArray<NumberParseMatcher*> fMatchers;
+ int32_t fMatchersLen;
+};
+
+
+} // namespace impl
+} // namespace numparse
+U_NAMESPACE_END
+
+#endif //__SOURCE_NUMPARSE_COMPOSITIONS__
+#endif /* #if !UCONFIG_NO_FORMATTING */
#include "numparse_currency.h"
#include "ucurrimp.h"
#include "unicode/errorcode.h"
+#include "numparse_utils.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
+namespace {
+
+inline void copyCurrencyCode(UChar* dest, const UChar* src) {
+ uprv_memcpy(dest, src, sizeof(UChar) * 3);
+ dest[3] = 0;
+}
+
+}
+
+
CurrencyNamesMatcher::CurrencyNamesMatcher(const Locale& locale, UErrorCode& status)
: fLocaleName(locale.getName(), -1, status) {}
return partialMatch;
}
-const UnicodeSet* CurrencyNamesMatcher::getLeadCodePoints() const {
- ErrorCode status;
- UnicodeSet* leadCodePoints = new UnicodeSet();
- uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
- // Always apply case mapping closure for currencies
- leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
- leadCodePoints->freeze();
+const UnicodeSet& CurrencyNamesMatcher::getLeadCodePoints() {
+ if (fLocalLeadCodePoints.isNull()) {
+ ErrorCode status;
+ auto* leadCodePoints = new UnicodeSet();
+ uprv_currencyLeads(fLocaleName.data(), *leadCodePoints, status);
+ // Always apply case mapping closure for currencies
+ leadCodePoints->closeOver(USET_ADD_CASE_MAPPINGS);
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+ }
+ return *fLocalLeadCodePoints;
+}
+
+
+CurrencyCustomMatcher::CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
+ const UnicodeString& currency2)
+ : fCurrency1(currency1), fCurrency2(currency2) {
+ copyCurrencyCode(fCurrencyCode, currencyCode);
+}
+
+bool CurrencyCustomMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
+ if (result.currencyCode[0] != 0) {
+ return false;
+ }
+
+ int overlap1 = segment.getCommonPrefixLength(fCurrency1);
+ if (overlap1 == fCurrency1.length()) {
+ copyCurrencyCode(result.currencyCode, fCurrencyCode);
+ segment.adjustOffset(overlap1);
+ result.setCharsConsumed(segment);
+ }
+
+ int overlap2 = segment.getCommonPrefixLength(fCurrency2);
+ if (overlap2 == fCurrency2.length()) {
+ copyCurrencyCode(result.currencyCode, fCurrencyCode);
+ segment.adjustOffset(overlap2);
+ result.setCharsConsumed(segment);
+ }
+
+ return overlap1 == segment.length() || overlap2 == segment.length();
+}
+
+const UnicodeSet& CurrencyCustomMatcher::getLeadCodePoints() {
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
+ utils::putLeadCodePoint(fCurrency1, leadCodePoints);
+ utils::putLeadCodePoint(fCurrency2, leadCodePoints);
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+ }
+ return *fLocalLeadCodePoints;
+}
+
+
+CurrencyAnyMatcher::CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher,
+ CurrencyCustomMatcher customMatcher)
+ : fNamesMatcher(std::move(namesMatcher)), fCustomMatcher(std::move(customMatcher)) {
+ fMatcherArray[0] = &fNamesMatcher;
+ fMatcherArray[1] = &fCustomMatcher;
+}
+
+const UnicodeSet& CurrencyAnyMatcher::getLeadCodePoints() {
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
+ leadCodePoints->addAll(fNamesMatcher.getLeadCodePoints());
+ leadCodePoints->addAll(fCustomMatcher.getLeadCodePoints());
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+ }
+ return *fLocalLeadCodePoints;
+}
+
+const NumberParseMatcher* const* CurrencyAnyMatcher::begin() const {
+ return fMatcherArray;
+}
- return leadCodePoints;
+const NumberParseMatcher* const* CurrencyAnyMatcher::end() const {
+ return fMatcherArray + 2;
}
#define __NUMPARSE_CURRENCY_H__
#include "numparse_types.h"
+#include "numparse_compositions.h"
#include "charstr.h"
U_NAMESPACE_BEGIN namespace numparse {
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
- const UnicodeSet* getLeadCodePoints() const override;
+ const UnicodeSet& getLeadCodePoints() override;
private:
// We could use Locale instead of CharString here, but
};
+class CurrencyCustomMatcher : public NumberParseMatcher, public UMemory {
+ public:
+ CurrencyCustomMatcher(const char16_t* currencyCode, const UnicodeString& currency1,
+ const UnicodeString& currency2);
+
+ bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
+
+ const UnicodeSet& getLeadCodePoints() override;
+
+ private:
+ UChar fCurrencyCode[4];
+ UnicodeString fCurrency1;
+ UnicodeString fCurrency2;
+};
+
+
+/**
+ * An implementation of AnyMatcher, allowing for either currency data or locale currency matches.
+ */
+class CurrencyAnyMatcher : public AnyMatcher, public UMemory {
+ public:
+ /** Calls std::move on the two arguments. */
+ CurrencyAnyMatcher(CurrencyNamesMatcher namesMatcher, CurrencyCustomMatcher customMatcher);
+
+ const UnicodeSet& getLeadCodePoints() override;
+
+ protected:
+ const NumberParseMatcher* const* begin() const override;
+
+ const NumberParseMatcher* const* end() const override;
+
+ private:
+ CurrencyNamesMatcher fNamesMatcher;
+ CurrencyCustomMatcher fCustomMatcher;
+
+ const NumberParseMatcher* fMatcherArray[2];
+};
+
+
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
return segment.length() == 0 || hasPartialPrefix;
}
-const UnicodeSet* DecimalMatcher::getLeadCodePoints() const {
+const UnicodeSet& DecimalMatcher::getLeadCodePoints() {
if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
- return new UnicodeSet(*leadSet);
+ return *leadSet;
}
- auto* leadCodePoints = new UnicodeSet();
- // Assumption: the sets are all single code points.
- leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
- leadCodePoints->addAll(*separatorSet);
- if (!fLocalDigitStrings.isNull()) {
- for (int i = 0; i < 10; i++) {
- utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
+ // Assumption: the sets are all single code points.
+ leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
+ leadCodePoints->addAll(*separatorSet);
+ if (!fLocalDigitStrings.isNull()) {
+ for (int i = 0; i < 10; i++) {
+ utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
+ }
}
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
- leadCodePoints->freeze();
- return leadCodePoints;
+ return *fLocalLeadCodePoints;
}
bool
match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;
- const UnicodeSet* getLeadCodePoints() const override;
+ const UnicodeSet& getLeadCodePoints() override;
private:
/** If true, only accept strings whose grouping sizes match the locale */
const UnicodeSet* leadSet;
// Make this class the owner of a few objects that could be allocated.
- // The first two LocalPointers are used for assigning ownership only.
+ // The first three LocalPointers are used for assigning ownership only.
LocalPointer<const UnicodeSet> fLocalDecimalUniSet;
LocalPointer<const UnicodeSet> fLocalSeparatorSet;
LocalArray<const UnicodeString> fLocalDigitStrings;
auto* parser = new NumberParserImpl(parseFlags, true);
DecimalFormatSymbols symbols(locale, status);
- parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
+ parser->fLocalMatchers.ignorables = std::move(IgnorablesMatcher(unisets::DEFAULT_IGNORABLES));
// MatcherFactory factory = new MatcherFactory();
// factory.currency = Currency.getInstance("USD");
fNumMatchers = 0;
}
-void NumberParserImpl::addMatcher(const NumberParseMatcher& matcher) {
+void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) {
if (fNumMatchers + 1 > fMatchers.getCapacity()) {
fMatchers.resize(fNumMatchers * 2, fNumMatchers);
if (fComputeLeads) {
fNumMatchers++;
}
-void NumberParserImpl::addLeadCodePointsForMatcher(const NumberParseMatcher& matcher) {
- const UnicodeSet* leadCodePoints = matcher.getLeadCodePoints();
+void NumberParserImpl::addLeadCodePointsForMatcher(NumberParseMatcher& matcher) {
+ const UnicodeSet& leadCodePoints = matcher.getLeadCodePoints();
// TODO: Avoid the clone operation here.
if (0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)) {
- UnicodeSet* copy = static_cast<UnicodeSet*>(leadCodePoints->cloneAsThawed());
- delete leadCodePoints;
+ auto* copy = dynamic_cast<UnicodeSet*>(leadCodePoints.cloneAsThawed());
copy->closeOver(USET_ADD_CASE_MAPPINGS);
copy->freeze();
fLeads[fNumMatchers] = copy;
} else {
- fLeads[fNumMatchers] = leadCodePoints;
+ // FIXME: new here because we still take ownership
+ fLeads[fNumMatchers] = new UnicodeSet(leadCodePoints);
}
}
static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString,
parse_flags_t parseFlags, UErrorCode& status);
- void addMatcher(const NumberParseMatcher& matcher);
+ void addMatcher(NumberParseMatcher& matcher);
void freeze();
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
- void addLeadCodePointsForMatcher(const NumberParseMatcher& matcher);
+ void addLeadCodePointsForMatcher(NumberParseMatcher& matcher);
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
return false;
}
-const UnicodeSet* ScientificMatcher::getLeadCodePoints() const {
+const UnicodeSet& ScientificMatcher::getLeadCodePoints() {
UChar32 leadCp = fExponentSeparatorString.char32At(0);
const UnicodeSet* s = unisets::get(unisets::SCIENTIFIC_LEAD);
if (s->contains(leadCp)) {
- return new UnicodeSet(*s);
- } else {
- UnicodeSet* leadCodePoints = new UnicodeSet();
+ return *s;
+ }
+
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
leadCodePoints->add(leadCp);
leadCodePoints->freeze();
- return leadCodePoints;
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
+ return *fLocalLeadCodePoints;
}
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
- const UnicodeSet* getLeadCodePoints() const override;
+ const UnicodeSet& getLeadCodePoints() override;
private:
UnicodeString fExponentSeparatorString;
return overlap == segment.length();
}
-const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
+const UnicodeSet& SymbolMatcher::getLeadCodePoints() {
if (fString.isEmpty()) {
// Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
- return new UnicodeSet(*fUniSet);
+ return *fUniSet;
}
- UnicodeSet* leadCodePoints = new UnicodeSet();
- utils::putLeadCodePoints(fUniSet, leadCodePoints);
- utils::putLeadCodePoint(fString, leadCodePoints);
- leadCodePoints->freeze();
- return leadCodePoints;
+ if (fLocalLeadCodePoints.isNull()) {
+ auto* leadCodePoints = new UnicodeSet();
+ utils::putLeadCodePoints(fUniSet, leadCodePoints);
+ utils::putLeadCodePoint(fString, leadCodePoints);
+ leadCodePoints->freeze();
+ fLocalLeadCodePoints.adoptInstead(leadCodePoints);
+ }
+ return *fLocalLeadCodePoints;
}
InfinityMatcher::InfinityMatcher(const DecimalFormatSymbols& dfs)
- : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::INFINITY) {
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol), unisets::INFINITY) {
}
bool InfinityMatcher::isDisabled(const ParsedNumber& result) const {
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
}
-const UnicodeSet* NanMatcher::getLeadCodePoints() const {
+const UnicodeSet& NanMatcher::getLeadCodePoints() {
// Overriding this here to allow use of statically allocated sets
int leadCp = fString.char32At(0);
const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
if (s->contains(leadCp)) {
- return new UnicodeSet(*s);
- } else {
- return SymbolMatcher::getLeadCodePoints();
+ return *s;
}
+
+ return SymbolMatcher::getLeadCodePoints();
}
bool NanMatcher::isDisabled(const ParsedNumber& result) const {
return true;
}
-bool PaddingMatcher::isDisabled(const ParsedNumber& result) const {
+bool PaddingMatcher::isDisabled(const ParsedNumber&) const {
return false;
}
-void PaddingMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+void PaddingMatcher::accept(StringSegment&, ParsedNumber&) const {
// No-op
}
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
- const UnicodeSet* getLeadCodePoints() const override;
+ /** NOTE: This method is not guaranteed to be thread-safe. */
+ const UnicodeSet& getLeadCodePoints() override;
virtual bool isDisabled(const ParsedNumber& result) const = 0;
NanMatcher(const DecimalFormatSymbols& dfs);
- const UnicodeSet* getLeadCodePoints() const override;
+ const UnicodeSet& getLeadCodePoints() override;
protected:
bool isDisabled(const ParsedNumber& result) const override;
*/
class NumberParseMatcher {
public:
- virtual ~NumberParseMatcher() = default;
-
/**
* Matchers can override this method to return true to indicate that they are optional and can be run
* repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
* something interesting in the StringSegment, it should update the offset of the StringSegment
* corresponding to how many chars were matched.
*
+ * This method is thread-safe.
+ *
* @param segment
* The StringSegment to match against. Matches always start at the beginning of the
* segment. The segment is guaranteed to contain at least one char.
* return value is used to skip this matcher unless a segment begins with a char in this set. To make
* this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
*
- * The returned UnicodeSet needs adoption!
+ * The returned UnicodeSet does not need adoption and is guaranteed to be alive for as long as the
+ * object that returned it.
+ *
+ * This method is NOT thread-safe.
*/
- virtual const UnicodeSet* getLeadCodePoints() const = 0;
+ virtual const UnicodeSet& getLeadCodePoints() = 0;
/**
* Method called at the end of a parse, after all matchers have failed to consume any more chars.
virtual void postProcess(ParsedNumber&) const {
// Default implementation: no-op
};
+
+ protected:
+ // No construction except by subclasses!
+ NumberParseMatcher() = default;
+
+ // Optional ownership of the leadCodePoints set
+ LocalPointer<const UnicodeSet> fLocalLeadCodePoints;
};
return b ? UnicodeString("TRUE"):UnicodeString("FALSE");
}
+UnicodeString toString(const UnicodeSet& uniset, UErrorCode& status) {
+ UnicodeString result;
+ uniset.toPattern(result, status);
+ return result;
+}
+
// stephen - cleaned up 05/05/99
UnicodeString operator+(const UnicodeString& left, char num)
{ return left + (long)num; }
return TRUE;
}
+UBool IntlTest::assertEquals(const char* message,
+ const UnicodeSet& expected,
+ const UnicodeSet& actual) {
+ IcuTestErrorCode status(*this, "assertEqualsUniSet");
+ if (expected != actual) {
+ errln((UnicodeString)"FAIL: " + message + "; got " +
+ toString(actual, status) +
+ "; expected " + toString(expected, status));
+ return FALSE;
+ }
+#ifdef VERBOSE_ASSERTIONS
+ else {
+ logln((UnicodeString)"Ok: " + message + "; got " + toString(actual, status));
+ }
+#endif
+ return TRUE;
+}
+
#if !UCONFIG_NO_FORMATTING
UBool IntlTest::assertEquals(const char* message,
UErrorCode actual) {
return assertEquals(extractToAssertBuf(message), expected, actual);
}
+UBool IntlTest::assertEquals(const UnicodeString& message,
+ const UnicodeSet& expected,
+ const UnicodeSet& actual) {
+ return assertEquals(extractToAssertBuf(message), expected, actual);
+}
#if !UCONFIG_NO_FORMATTING
UBool IntlTest::assertEquals(const UnicodeString& message,
// The following includes utypes.h, uobject.h and unistr.h
#include "unicode/fmtable.h"
#include "unicode/testlog.h"
+#include "unicode/uniset.h"
U_NAMESPACE_USE
UBool assertEquals(const char* message, int64_t expected, int64_t actual);
UBool assertEquals(const char* message, double expected, double actual);
UBool assertEquals(const char* message, UErrorCode expected, UErrorCode actual);
+ UBool assertEquals(const char* message, const UnicodeSet& expected, const UnicodeSet& actual);
#if !UCONFIG_NO_FORMATTING
UBool assertEquals(const char* message, const Formattable& expected,
const Formattable& actual, UBool possibleDataError=FALSE);
UBool assertEquals(const UnicodeString& message, int64_t expected, int64_t actual);
UBool assertEquals(const UnicodeString& message, double expected, double actual);
UBool assertEquals(const UnicodeString& message, UErrorCode expected, UErrorCode actual);
+ UBool assertEquals(const UnicodeString& message, const UnicodeSet& expected, const UnicodeSet& actual);
virtual void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); // overide !
void testBasic();
void testLocaleFi();
void testSeriesMatcher();
+ void testCurrencyAnyMatcher();
void testGroupingDisabled();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
+ TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO_END;
}
{3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
- for (auto cas : cases) {
+ for (auto& cas : cases) {
UnicodeString inputString(cas.inputString);
UnicodeString patternString(cas.patternString);
LocalPointer<const NumberParserImpl> parser(
}
}
+void NumberParserTest::testSeriesMatcher() {
+ IcuTestErrorCode status(*this, "testSeriesMatcher");
+
+ DecimalFormatSymbols symbols("en", status);
+
+ PlusSignMatcher m0(symbols, false);
+ MinusSignMatcher m1(symbols, false);
+ IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES);
+ PercentMatcher m3(symbols);
+ IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
+
+ ArraySeriesMatcher series(new NumberParseMatcher* [5]{&m0, &m1, &m2, &m3, &m4}, 5);
+
+ assertEquals(
+ "Lead set should be equal to lead set of lead matcher",
+ *unisets::get(unisets::PLUS_SIGN),
+ series.getLeadCodePoints());
+
+ static const struct TestCase {
+ const char16_t* input;
+ int32_t expectedOffset;
+ bool expectedMaybeMore;
+ } cases[] = {{u"", 0, true},
+ {u" ", 0, false},
+ {u"$", 0, false},
+ {u"+", 0, true},
+ {u" +", 0, false},
+ {u"+-", 0, true},
+ {u"+ -", 0, false},
+ {u"+- ", 0, true},
+ {u"+- $", 0, false},
+ {u"+-%", 3, true},
+ {u" +- % ", 0, false},
+ {u"+- % ", 7, true},
+ {u"+-%$", 3, false}};
+
+ for (auto& cas : cases) {
+ UnicodeString input(cas.input);
+
+ StringSegment segment(input, 0);
+ ParsedNumber result;
+ bool actualMaybeMore = series.match(segment, result, status);
+ int actualOffset = segment.getOffset();
+
+ assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
+ assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
+ }
+}
+
#endif
/**
* A matcher for a single currency instance (not the full trie).
*/
-public class CurrencyMatcher implements NumberParseMatcher {
+public class CurrencyCustomMatcher implements NumberParseMatcher {
private final String isoCode;
private final String currency1;
private final String currency2;
- public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
- return new CurrencyMatcher(currency.getSubtype(),
+ public static CurrencyCustomMatcher getInstance(Currency currency, ULocale loc) {
+ return new CurrencyCustomMatcher(currency.getSubtype(),
currency.getSymbol(loc),
currency.getCurrencyCode());
}
- private CurrencyMatcher(String isoCode, String currency1, String currency2) {
+ private CurrencyCustomMatcher(String isoCode, String currency1, String currency2) {
this.isoCode = isoCode;
this.currency1 = currency1;
this.currency2 = currency2;
import com.ibm.icu.util.ULocale;
/**
- * @author sffc
+ * Matches currencies according to all available strings in locale data.
+ *
+ * The implementation of this class is different between J and C. See #13584 for a follow-up.
*
+ * @author sffc
*/
-public class CurrencyTrieMatcher implements NumberParseMatcher {
+public class CurrencyNamesMatcher implements NumberParseMatcher {
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
- public static CurrencyTrieMatcher getInstance(ULocale locale) {
+ public static CurrencyNamesMatcher getInstance(ULocale locale) {
// TODO: Pre-compute some of the more popular locales?
- return new CurrencyTrieMatcher(locale);
+ return new CurrencyNamesMatcher(locale);
}
- private CurrencyTrieMatcher(ULocale locale) {
- // TODO: Currency trie does not currently have an option for case folding. It defaults to use
+ private CurrencyNamesMatcher(ULocale locale) {
+ // TODO: Currency trie does not currently have an option for case folding. It defaults to use
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
UnicodeSet leadCodePoints = new UnicodeSet();
longNameTrie.putLeadCodePoints(leadCodePoints);
symbolTrie.putLeadCodePoints(leadCodePoints);
+ // Always apply case mapping closure for currencies
+ leadCodePoints.closeOver(UnicodeSet.ADD_CASE_MAPPINGS);
return leadCodePoints.freeze();
}
import com.ibm.icu.util.ULocale;
/**
- * @author sffc
+ * Small helper class that generates matchers for SeriesMatcher.
*
+ * @author sffc
*/
public class MatcherFactory {
- Currency currency;
- DecimalFormatSymbols symbols;
- IgnorablesMatcher ignorables;
- ULocale locale;
+ public Currency currency;
+ public DecimalFormatSymbols symbols;
+ public IgnorablesMatcher ignorables;
+ public ULocale locale;
public MinusSignMatcher minusSign(boolean allowTrailing) {
return MinusSignMatcher.getInstance(symbols, allowTrailing);
public AnyMatcher currency() {
AnyMatcher any = new AnyMatcher();
- any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
- any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ any.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
+ any.addMatcher(CurrencyNamesMatcher.getInstance(locale));
any.freeze();
return any;
}
parser.addMatcher(InfinityMatcher.getInstance(symbols));
parser.addMatcher(PaddingMatcher.getInstance("@"));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
- parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
parser.addMatcher(new RequireNumberMatcher());
parser.freeze();
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
- parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
- parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ parser.addMatcher(CurrencyCustomMatcher.getInstance(currency, locale));
+ parser.addMatcher(CurrencyNamesMatcher.getInstance(locale));
}
///////////////////////////////
import org.junit.Test;
+import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
+import com.ibm.icu.impl.number.parse.AnyMatcher;
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
+import com.ibm.icu.impl.number.parse.MatcherFactory;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.ParsedNumber;
}
}
+ @Test
+ public void testCurrencyAnyMatcher() {
+ MatcherFactory factory = new MatcherFactory();
+ factory.locale = ULocale.ENGLISH;
+ CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
+ factory.currency = currency;
+ AnyMatcher matcher = factory.currency();
+
+ Object[][] cases = new Object[][] {
+ { "", null },
+ { "FOO", null },
+ { "USD", "USD" },
+ { "$", "USD" },
+ { "US dollars", "USD" },
+ { "eu", null },
+ { "euros", "EUR" },
+ { "ICU", "ICU" },
+ { "IU$", "ICU" } };
+ for (Object[] cas : cases) {
+ String input = (String) cas[0];
+ String expectedCurrencyCode = (String) cas[1];
+
+ StringSegment segment = new StringSegment(input, 0);
+ ParsedNumber result = new ParsedNumber();
+ matcher.match(segment, result);
+ assertEquals("Parsing " + input, expectedCurrencyCode, result.currencyCode);
+ assertEquals("Whole string on " + input,
+ expectedCurrencyCode == null ? 0 : input.length(),
+ result.charEnd);
+ }
+ }
+
@Test
public void testGroupingDisabled() {
DecimalFormatProperties properties = new DecimalFormatProperties();