auto* parser = new NumberParserImpl(parseFlags, true);
DecimalFormatSymbols symbols(locale, status);
-// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault();
-//
+ IgnorablesMatcher* ignorables = new IgnorablesMatcher(unisets::DEFAULT_IGNORABLES);
+
// MatcherFactory factory = new MatcherFactory();
// factory.currency = Currency.getInstance("USD");
// factory.symbols = symbols;
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
grouper.setLocaleData(patternInfo, locale);
-// parser.addMatcher({ignorables, false});
+ parser->addAndAdoptMatcher(ignorables);
parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags));
parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false));
-// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
+ parser->addAndAdoptMatcher(new PlusSignMatcher(symbols, false));
+ parser->addAndAdoptMatcher(new PercentMatcher(symbols));
+ parser->addAndAdoptMatcher(new PermilleMatcher(symbols));
+ parser->addAndAdoptMatcher(new NanMatcher(symbols));
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
// parser.addMatcher(new RequireNumberMatcher());
SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) {
fUniSet = unisets::get(key);
- fOwnsUniSet = false;
if (fUniSet->contains(symbolString)) {
fString.setToBogus();
} else {
}
}
-SymbolMatcher::~SymbolMatcher() {
- if (fOwnsUniSet) {
- delete fUniSet;
- fUniSet = nullptr;
- }
-}
-
const UnicodeSet* SymbolMatcher::getSet() {
return fUniSet;
}
}
-MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher(
- dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol),
- unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) {
+IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
+ : SymbolMatcher({}, key) {
+}
+
+bool IgnorablesMatcher::isFlexible() const {
+ return true;
+}
+
+bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const {
+ return false;
+}
+
+void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const {
+ // No-op
+}
+
+
+MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN),
+ fAllowTrailing(allowTrailing) {
}
bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const {
- return 0 != (result.flags & FLAG_NEGATIVE) ||
- (fAllowTrailing ? false : result.seenNumber());
+ return 0 != (result.flags & FLAG_NEGATIVE) || (fAllowTrailing ? false : result.seenNumber());
}
void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
}
+NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs)
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) {
+}
+
+const UnicodeSet* NanMatcher::getLeadCodePoints() const {
+ // Overriding this here to allow use of statically allocated sets
+ int leadCp = fString.char32At(0);
+ const UnicodeSet* s = unisets::get(unisets::NAN_LEAD);
+ if (s->contains(leadCp)) {
+ return new UnicodeSet(*s);
+ } else {
+ return SymbolMatcher::getLeadCodePoints();
+ }
+}
+
+bool NanMatcher::isDisabled(const ParsedNumber& result) const {
+ return result.seenNumber();
+}
+
+void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+ result.flags |= FLAG_NAN;
+ result.setCharsConsumed(segment);
+}
+
+
+PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs)
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) {
+}
+
+void PercentMatcher::postProcess(ParsedNumber& result) const {
+ SymbolMatcher::postProcess(result);
+ if (0 != (result.flags & FLAG_PERCENT) && !result.quantity.bogus) {
+ result.quantity.adjustMagnitude(-2);
+ }
+}
+
+bool PercentMatcher::isDisabled(const ParsedNumber& result) const {
+ return 0 != (result.flags & FLAG_PERCENT);
+}
+
+void PercentMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+ result.flags |= FLAG_PERCENT;
+ result.setCharsConsumed(segment);
+}
+
+
+PermilleMatcher::PermilleMatcher(const DecimalFormatSymbols& dfs)
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol), unisets::PERMILLE_SIGN) {
+}
+
+void PermilleMatcher::postProcess(ParsedNumber& result) const {
+ SymbolMatcher::postProcess(result);
+ if (0 != (result.flags & FLAG_PERMILLE) && !result.quantity.bogus) {
+ result.quantity.adjustMagnitude(-3);
+ }
+}
+
+bool PermilleMatcher::isDisabled(const ParsedNumber& result) const {
+ return 0 != (result.flags & FLAG_PERMILLE);
+}
+
+void PermilleMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+ result.flags |= FLAG_PERMILLE;
+ result.setCharsConsumed(segment);
+}
+
+
+PlusSignMatcher::PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
+ : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol), unisets::PLUS_SIGN),
+ fAllowTrailing(allowTrailing) {
+}
+
+bool PlusSignMatcher::isDisabled(const ParsedNumber& result) const {
+ return fAllowTrailing ? false : result.seenNumber();
+}
+
+void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
+ result.setCharsConsumed(segment);
+}
+
+
#endif /* #if !UCONFIG_NO_FORMATTING */
class SymbolMatcher : public NumberParseMatcher, public UMemory {
public:
- ~SymbolMatcher() override;
-
const UnicodeSet* getSet();
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
protected:
UnicodeString fString;
- const UnicodeSet* fUniSet;
- bool fOwnsUniSet;
+ const UnicodeSet* fUniSet; // a reference from numparse_unisets.h; never owned
SymbolMatcher(const UnicodeString& symbolString, unisets::Key key);
};
+class IgnorablesMatcher : public SymbolMatcher {
+ public:
+ explicit IgnorablesMatcher(unisets::Key key);
+
+ bool isFlexible() const override;
+
+ protected:
+ bool isDisabled(const ParsedNumber& result) const override;
+
+ void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
class MinusSignMatcher : public SymbolMatcher {
public:
MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
};
+class NanMatcher : public SymbolMatcher {
+ public:
+ explicit NanMatcher(const DecimalFormatSymbols& dfs);
+
+ const UnicodeSet* getLeadCodePoints() const override;
+
+ protected:
+ bool isDisabled(const ParsedNumber& result) const override;
+
+ void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
+class PercentMatcher : public SymbolMatcher {
+ public:
+ explicit PercentMatcher(const DecimalFormatSymbols& dfs);
+
+ void postProcess(ParsedNumber& result) const override;
+
+ protected:
+ bool isDisabled(const ParsedNumber& result) const override;
+
+ void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
+class PermilleMatcher : public SymbolMatcher {
+ public:
+ explicit PermilleMatcher(const DecimalFormatSymbols& dfs);
+
+ void postProcess(ParsedNumber& result) const override;
+
+ protected:
+ bool isDisabled(const ParsedNumber& result) const override;
+
+ void accept(StringSegment& segment, ParsedNumber& result) const override;
+};
+
+
+class PlusSignMatcher : public SymbolMatcher {
+ public:
+ PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
+
+ protected:
+ bool isDisabled(const ParsedNumber& result) const override;
+
+ void accept(StringSegment& segment, ParsedNumber& result) const override;
+
+ private:
+ bool fAllowTrailing;
+};
+
+
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets);
#define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status)
+ gUnicodeSets[EMPTY] = new UnicodeSet();
+
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status);
namespace unisets {
enum Key {
+ EMPTY,
+
// Ignorables
BIDI,
WHITESPACE,
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <cmath>
#include "unicode/ctest.h" // for str_timeDelta
#include "unicode/curramt.h"
UBool IntlTest::assertEquals(const char* message,
double expected,
double actual) {
- if (expected != actual) {
+ bool bothNaN = std::isnan(expected) && std::isnan(actual);
+ if (expected != actual && !bothNaN) {
errln((UnicodeString)"FAIL: " + message + "; got " +
actual +
"; expected " + expected);
{3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏx", u"0", 10, 51423.},
{3, u" ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"0", 11, 51423.},
{3, u"๐ฑ๐ญ๐ฐ๐ฎ๐ฏ ", u"0", 10, 51423.},
+ {7, u"51,423", u"#,##,##0", 6, 51423.},
+ {7, u" 51,423", u"#,##,##0", 7, 51423.},
+ {7, u"51,423 ", u"#,##,##0", 6, 51423.},
{7, u"๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u"#,##,##0", 11, 51423.},
{7, u"๐ณ,๐ด๐ต,๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", u"#,##,##0", 19, 78951423.},
{7, u"๐ณ๐ด,๐ต๐ฑ๐ญ.๐ฐ๐ฎ๐ฏ", u"#,##,##0", 18, 78951.423},
{7, u"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฌ๐ฌ", u"#,##,##0", 18, 78000.},
{7, u"๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u"#,##,##0", 18, 78000.023},
{7, u"๐ณ๐ด.๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", u"#,##,##0", 11, 78.},
- {3, u"-๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", u"0", 11, -51423.},
- {3, u"-๐ฑ๐ญ๐ฐ๐ฎ๐ฏ-", u"0", 11, -51423.},
+ {3, u"-51423", u"0", 6, -51423.},
+ {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
+ {3, u"+51423", u"0", 6, 51423.},
+ {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
+ {3, u"%51423", u"0", 6, 514.23},
+ {3, u"51423%", u"0", 6, 514.23},
+ {3, u"51423%%", u"0", 6, 514.23},
+ {3, u"โฐ51423", u"0", 6, 51.423},
+ {3, u"51423โฐ", u"0", 6, 51.423},
+ {3, u"51423โฐโฐ", u"0", 6, 51.423},
// {3, u"a51423US dollars", u"a0ยคยคยค", 16, 51423.},
// {3, u"a 51423 US dollars", u"a0ยคยคยค", 18, 51423.},
// {3, u"514.23 USD", u"ยค0", 10, 514.23},
// {3, u"a$ b5", u"a ยค b0", 5, 5.0},
// {3, u"๐บ1.23", u"๐บ0;๐ป0", 6, 1.23},
// {3, u"๐ป1.23", u"๐บ0;๐ป0", 6, -1.23},
-// {3, u".00", u"0", 3, 0.0},
-// {3, u" 0", u"a0", 31, 0.0}, // should not hang
-// {3, u"NaN", u"0", 3, NAN},
-// {3, u"NaN E5", u"0", 3, NAN},
-// {3, u"0", u"0", 1, 0.0}
- };
+ {3, u".00", u"0", 3, 0.0},
+ {3, u" 1,234", u"a0", 35, 1234.}, // should not hang
+ {3, u"NaN", u"0", 3, NAN},
+ {3, u"NaN E5", u"0", 3, NAN},
+ {3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
for (auto cas : cases) {
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
+ parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
+ parser.addMatcher(PercentMatcher.getInstance(symbols));
+ parser.addMatcher(PermilleMatcher.getInstance(symbols));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
{ 3, "๐ฑ๐ญ๐ฐ๐ฎ๐ฏx", "0", 10, 51423. },
{ 3, " ๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", "0", 11, 51423. },
{ 3, "๐ฑ๐ญ๐ฐ๐ฎ๐ฏ ", "0", 10, 51423. },
+ { 7, "51,423", "#,##,##0", 6, 51423. },
+ { 7, " 51,423", "#,##,##0", 7, 51423. },
+ { 7, "51,423 ", "#,##,##0", 6, 51423. },
{ 7, "๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", "#,##,##0", 11, 51423. },
{ 7, "๐ณ,๐ด๐ต,๐ฑ๐ญ,๐ฐ๐ฎ๐ฏ", "#,##,##0", 19, 78951423. },
{ 7, "๐ณ๐ด,๐ต๐ฑ๐ญ.๐ฐ๐ฎ๐ฏ", "#,##,##0", 18, 78951.423 },
{ 7, "๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฌ๐ฌ", "#,##,##0", 18, 78000. },
{ 7, "๐ณ๐ด,๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", "#,##,##0", 18, 78000.023 },
{ 7, "๐ณ๐ด.๐ฌ๐ฌ๐ฌ.๐ฌ๐ฎ๐ฏ", "#,##,##0", 11, 78. },
- { 3, "-๐ฑ๐ญ๐ฐ๐ฎ๐ฏ", "0", 11, -51423. },
- { 3, "-๐ฑ๐ญ๐ฐ๐ฎ๐ฏ-", "0", 11, -51423. },
+ { 3, "-51423", "0", 6, -51423. },
+ { 3, "51423-", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
+ { 3, "+51423", "0", 6, 51423. },
+ { 3, "51423+", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
+ { 3, "%51423", "0", 6, 514.23 },
+ { 3, "51423%", "0", 6, 514.23 },
+ { 3, "51423%%", "0", 6, 514.23 },
+ { 3, "โฐ51423", "0", 6, 51.423 },
+ { 3, "51423โฐ", "0", 6, 51.423 },
+ { 3, "51423โฐโฐ", "0", 6, 51.423 },
{ 3, "a51423US dollars", "a0ยคยคยค", 16, 51423. },
{ 3, "a 51423 US dollars", "a0ยคยคยค", 18, 51423. },
{ 3, "514.23 USD", "ยค0", 10, 514.23 },
{ 3, "๐บ1.23", "๐บ0;๐ป0", 6, 1.23 },
{ 3, "๐ป1.23", "๐บ0;๐ป0", 6, -1.23 },
{ 3, ".00", "0", 3, 0.0 },
- { 3, " 0", "a0", 31, 0.0 }, // should not hang
+ { 3, " 1,234", "a0", 35, 1234. }, // should not hang
{ 3, "NaN", "0", 3, Double.NaN },
{ 3, "NaN E5", "0", 3, Double.NaN },
{ 3, "0", "0", 1, 0.0 } };