LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
DecimalFormatSymbols symbols(locale, status);
- parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
+ parser->fLocalMatchers.ignorables = {parseFlags};
IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
DecimalFormatSymbols dfs(locale, status);
parseFlags |= PARSE_FLAG_STRICT_SEPARATORS;
parseFlags |= PARSE_FLAG_USE_FULL_AFFIXES;
parseFlags |= PARSE_FLAG_EXACT_AFFIX;
+ parseFlags |= PARSE_FLAG_STRICT_IGNORABLES;
} else {
parseFlags |= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
}
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
- parser->fLocalMatchers.ignorables = {
- isStrict ? unisets::STRICT_IGNORABLES : unisets::DEFAULT_IGNORABLES};
+ parser->fLocalMatchers.ignorables = {parseFlags};
IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
//////////////////////
ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
: fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
- fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) {
+ fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
+ fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
if (minusSignSet().contains(minusSign)) {
// First match the scientific separator, and then match another number after it.
// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
- int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
- if (overlap1 == fExponentSeparatorString.length()) {
+ int32_t initialOffset = segment.getOffset();
+ int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
+ if (overlap == fExponentSeparatorString.length()) {
// Full exponent separator match.
// First attempt to get a code point, returning true if we can't get one.
- if (segment.length() == overlap1) {
+ if (segment.length() == overlap) {
+ return true;
+ }
+ segment.adjustOffset(overlap);
+
+ // Allow ignorables before the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
return true;
}
- segment.adjustOffset(overlap1);
// Allow a sign, and then try to match digits.
int8_t exponentSign = 1;
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(fCustomMinusSign)) {
- // Note: call site is guarded with startsWith, which returns false on empty string
- int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
- if (overlap2 != fCustomMinusSign.length()) {
- // Partial custom sign match; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ overlap = segment.getCommonPrefixLength(fCustomMinusSign);
+ if (overlap != fCustomMinusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
return true;
}
exponentSign = -1;
- segment.adjustOffset(overlap2);
+ segment.adjustOffset(overlap);
} else if (segment.startsWith(fCustomPlusSign)) {
- // Note: call site is guarded with startsWith, which returns false on empty string
- int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
- if (overlap2 != fCustomPlusSign.length()) {
- // Partial custom sign match; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ overlap = segment.getCommonPrefixLength(fCustomPlusSign);
+ if (overlap != fCustomPlusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
return true;
}
- segment.adjustOffset(overlap2);
+ segment.adjustOffset(overlap);
+ }
+
+ // Return true if the segment is empty.
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow ignorables after the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ // Note: the ignorables matcher should not touch the result.
+ fIgnorablesMatcher.match(segment, result, status);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
}
// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
// At least one exponent digit was matched.
result.flags |= FLAG_HAS_EXPONENT;
} else {
- // No exponent digits were matched; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ // No exponent digits were matched
+ segment.setOffset(initialOffset);
}
return digitsReturnValue;
- } else if (overlap1 == segment.length()) {
+ } else if (overlap == segment.length()) {
// Partial exponent separator match
return true;
}
#include "numparse_types.h"
#include "numparse_decimal.h"
+#include "numparse_symbols.h"
#include "unicode/numberformatter.h"
using icu::number::impl::Grouper;
private:
UnicodeString fExponentSeparatorString;
DecimalMatcher fExponentMatcher;
+ IgnorablesMatcher fIgnorablesMatcher;
UnicodeString fCustomMinusSign;
UnicodeString fCustomPlusSign;
};
}
-IgnorablesMatcher::IgnorablesMatcher(unisets::Key key)
- : SymbolMatcher({}, key) {
+IgnorablesMatcher::IgnorablesMatcher(parse_flags_t parseFlags) :
+ SymbolMatcher(
+ {},
+ (0 != (parseFlags & PARSE_FLAG_STRICT_IGNORABLES)) ?
+ unisets::STRICT_IGNORABLES :
+ unisets::DEFAULT_IGNORABLES) {
}
bool IgnorablesMatcher::isFlexible() const {
public:
IgnorablesMatcher() = default; // WARNING: Leaves the object in an unusable state
- IgnorablesMatcher(unisets::Key key);
+ IgnorablesMatcher(parse_flags_t parseFlags);
bool isFlexible() const override;
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
+ PARSE_FLAG_STRICT_IGNORABLES = 0x8000,
};
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
+ {3, u"5.142e+3", u"0", 8, 5142.0 },
+ {3, u"5.142\u200Ee+3", u"0", 9, 5142.0},
+ {3, u"5.142e\u200E+3", u"0", 9, 5142.0},
+ {3, u"5.142e+\u200E3", u"0", 9, 5142.0},
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
}
PlusSignMatcher m0(symbols, false);
MinusSignMatcher m1(symbols, false);
- IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES);
+ IgnorablesMatcher m2(0);
PercentMatcher m3(symbols);
- IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
+ IgnorablesMatcher m4(0);
ArraySeriesMatcher::MatcherArray matchers(5);
matchers[0] = &m0;
void NumberParserTest::testCombinedCurrencyMatcher() {
IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
- IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
+ IgnorablesMatcher ignorables(0);
Locale locale = Locale::getEnglish();
DecimalFormatSymbols dfs(locale, status);
void NumberParserTest::testAffixPatternMatcher() {
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
Locale locale = Locale::getEnglish();
- IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
+ IgnorablesMatcher ignorables(0);
DecimalFormatSymbols dfs(locale, status);
dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
British pounds 123 123 GBP H
british POUNDS 123 123 GBP H
-
+test parse scientific with bidi marks
+begin
+locale parse output breaks
+en 4E\u200E+02 400 HK
+en 4E+02 400 K
+he 4E\u200E+02 400 K
+he 4E+02 400 HK
*/
public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible {
- public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
+ private static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES));
- public static final IgnorablesMatcher STRICT = new IgnorablesMatcher(
+ private static final IgnorablesMatcher STRICT = new IgnorablesMatcher(
StaticUnicodeSets.get(StaticUnicodeSets.Key.STRICT_IGNORABLES));
- public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
- assert ignorables.isFrozen();
- return new IgnorablesMatcher(ignorables);
+ public static IgnorablesMatcher getInstance(int parseFlags) {
+ if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES)) {
+ return STRICT;
+ } else {
+ return DEFAULT;
+ }
}
private IgnorablesMatcher(UnicodeSet ignorables) {
NumberParserImpl parser = new NumberParserImpl(parseFlags);
Currency currency = Currency.getInstance("USD");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
- IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
+ IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = currency;
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES;
parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX;
+ parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES;
} else {
parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
}
if (!parseCurrency) {
parseFlags |= ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES;
}
- IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
NumberParserImpl parser = new NumberParserImpl(parseFlags);
+ IgnorablesMatcher ignorables = IgnorablesMatcher.getInstance(parseFlags);
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = currency;
public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000;
public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000;
public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000;
+ public static final int PARSE_FLAG_STRICT_IGNORABLES = 0x8000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {
private final String exponentSeparatorString;
private final DecimalMatcher exponentMatcher;
+ private final IgnorablesMatcher ignorablesMatcher;
private final String customMinusSign;
private final String customPlusSign;
exponentMatcher = DecimalMatcher.getInstance(symbols,
grouper,
ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
+ ignorablesMatcher = IgnorablesMatcher.getInstance(ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES);
String minusSign = symbols.getMinusSignString();
customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
// First match the scientific separator, and then match another number after it.
// NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again.
- int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString);
- if (overlap1 == exponentSeparatorString.length()) {
+ int initialOffset = segment.getOffset();
+ int overlap = segment.getCommonPrefixLength(exponentSeparatorString);
+ if (overlap == exponentSeparatorString.length()) {
// Full exponent separator match.
// First attempt to get a code point, returning true if we can't get one.
- if (segment.length() == overlap1) {
+ if (segment.length() == overlap) {
+ return true;
+ }
+ segment.adjustOffset(overlap);
+
+ // Allow ignorables before the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ ignorablesMatcher.match(segment, null);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
return true;
}
- segment.adjustOffset(overlap1);
// Allow a sign, and then try to match digits.
int exponentSign = 1;
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(customMinusSign)) {
- // Note: call site is guarded with startsWith, which returns false on empty string
- int overlap2 = segment.getCommonPrefixLength(customMinusSign);
- if (overlap2 != customMinusSign.length()) {
- // Partial custom sign match; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ overlap = segment.getCommonPrefixLength(customMinusSign);
+ if (overlap != customMinusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
return true;
}
exponentSign = -1;
- segment.adjustOffset(overlap2);
+ segment.adjustOffset(overlap);
} else if (segment.startsWith(customPlusSign)) {
- // Note: call site is guarded with startsWith, which returns false on empty string
- int overlap2 = segment.getCommonPrefixLength(customPlusSign);
- if (overlap2 != customPlusSign.length()) {
- // Partial custom sign match; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ overlap = segment.getCommonPrefixLength(customPlusSign);
+ if (overlap != customPlusSign.length()) {
+ // Partial custom sign match
+ segment.setOffset(initialOffset);
return true;
}
- segment.adjustOffset(overlap2);
+ segment.adjustOffset(overlap);
+ }
+
+ // Return true if the segment is empty.
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
+ }
+
+ // Allow ignorables after the sign.
+ // Note: call site is guarded by the segment.length() check above.
+ ignorablesMatcher.match(segment, null);
+ if (segment.length() == 0) {
+ segment.setOffset(initialOffset);
+ return true;
}
// We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
// At least one exponent digit was matched.
result.flags |= ParsedNumber.FLAG_HAS_EXPONENT;
} else {
- // No exponent digits were matched; un-match the exponent separator.
- segment.adjustOffset(-overlap1);
+ // No exponent digits were matched
+ segment.setOffset(initialOffset);
}
return digitsReturnValue;
- } else if (overlap1 == segment.length()) {
+ } else if (overlap == segment.length()) {
// Partial exponent separator match
return true;
}
British pounds 123 123 GBP H
british POUNDS 123 123 GBP H
-
+test parse scientific with bidi marks
+begin
+locale parse output breaks
+en 4E\u200E+02 400 HK
+en 4E+02 400 K
+he 4E\u200E+02 400 K
+he 4E+02 400 HK
{ 3, "𝟱.𝟭𝟰𝟮E𝟯", "0", 12, 5142. },
{ 3, "𝟱.𝟭𝟰𝟮E-𝟯", "0", 13, 0.005142 },
{ 3, "𝟱.𝟭𝟰𝟮e-𝟯", "0", 13, 0.005142 },
+ { 3, "5.142e+3", "0", 8, 5142.0 },
+ { 3, "5.142\u200Ee+3", "0", 9, 5142.0 },
+ { 3, "5.142e\u200E+3", "0", 9, 5142.0 },
+ { 3, "5.142e+\u200E3", "0", 9, 5142.0 },
{ 7, "5,142.50 Canadian dollars", "#,##,##0 ¤¤¤", 25, 5142.5 },
{ 3, "a$ b5", "a ¤ b0", 5, 5.0 },
{ 3, "📺1.23", "📺0;📻0", 6, 1.23 },
SeriesMatcher series = new SeriesMatcher();
series.addMatcher(PlusSignMatcher.getInstance(symbols, false));
series.addMatcher(MinusSignMatcher.getInstance(symbols, false));
- series.addMatcher(IgnorablesMatcher.DEFAULT);
+ series.addMatcher(IgnorablesMatcher.getInstance(0));
series.addMatcher(PercentMatcher.getInstance(symbols));
- series.addMatcher(IgnorablesMatcher.DEFAULT);
+ series.addMatcher(IgnorablesMatcher.getInstance(0));
series.freeze();
assertFalse(series.smokeTest(new StringSegment("x", false)));
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.currency = Currency.getInstance("EUR");
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
- factory.ignorables = IgnorablesMatcher.DEFAULT;
+ factory.ignorables = IgnorablesMatcher.getInstance(0);
factory.locale = ULocale.ENGLISH;
factory.parseFlags = 0;