parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
-// parser.addMatcher(new RequireNumberMatcher());
+ parser->addMatcher(parser->fLocalValidators.number = {});
parser->freeze();
return parser.orphan();
StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
- parseGreedyRecursive(segment, result, status);
+ parseGreedy(segment, result, status);
+ } else if (0 != (fParseFlags & PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
+ // Start at 1 so that recursionLevels never gets to 0
+ parseLongestRecursive(segment, result, 1, status);
} else {
- parseLongestRecursive(segment, result, status);
+ // Arbitrary recursion safety limit: 100 levels.
+ parseLongestRecursive(segment, result, -100, status);
}
for (int32_t i = 0; i < fNumMatchers; i++) {
fMatchers[i]->postProcess(result);
result.postProcess();
}
-void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result,
+void NumberParserImpl::parseGreedy(StringSegment& segment, ParsedNumber& result,
UErrorCode& status) const {
- // Base Case
- if (segment.length() == 0) {
- return;
- }
-
- int initialOffset = segment.getOffset();
- for (int32_t i = 0; i < fNumMatchers; i++) {
+ // Note: this method is not recursive in order to avoid stack overflow.
+ for (int i = 0; i <fNumMatchers;) {
+ // Base Case
+ if (segment.length() == 0) {
+ return;
+ }
const NumberParseMatcher* matcher = fMatchers[i];
if (!matcher->smokeTest(segment)) {
+ // Matcher failed smoke test: try the next one
+ i++;
continue;
}
+ int32_t initialOffset = segment.getOffset();
matcher->match(segment, result, status);
if (U_FAILURE(status)) {
return;
}
if (segment.getOffset() != initialOffset) {
- // In a greedy parse, recurse on only the first match.
- parseGreedyRecursive(segment, result, status);
- // The following line resets the offset so that the StringSegment says the same across
- // the function
- // call boundary. Since we recurse only once, this line is not strictly necessary.
- segment.setOffset(initialOffset);
- return;
+ // Greedy heuristic: accept the match and loop back
+ i = 0;
+ continue;
+ } else {
+ // Matcher did not match: try the next one
+ i++;
+ continue;
}
+ UPRV_UNREACHABLE;
}
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
}
void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result,
+ int32_t recursionLevels,
UErrorCode& status) const {
// Base Case
if (segment.length() == 0) {
return;
}
+ // Safety against stack overflow
+ if (recursionLevels == 0) {
+ return;
+ }
+
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
ParsedNumber initial(result);
ParsedNumber candidate;
// If the entire segment was consumed, recurse.
if (segment.getOffset() - initialOffset == charsToConsume) {
- parseLongestRecursive(segment, candidate, status);
+ parseLongestRecursive(segment, candidate, recursionLevels + 1, status);
if (U_FAILURE(status)) {
return;
}
explicit NumberParserImpl(parse_flags_t parseFlags);
- void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
+ void parseGreedy(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
- void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
+ void parseLongestRecursive(
+ StringSegment& segment, ParsedNumber& result, int32_t recursionLevels, UErrorCode& status) const;
};
return !quantity.bogus || 0 != (flags & FLAG_NAN) || 0 != (flags & FLAG_INFINITY);
}
-double ParsedNumber::getDouble() const {
+double ParsedNumber::getDouble(UErrorCode& status) const {
bool sawNaN = 0 != (flags & FLAG_NAN);
bool sawInfinity = 0 != (flags & FLAG_INFINITY);
return INFINITY;
}
}
- U_ASSERT(!quantity.bogus);
+ if (quantity.bogus) {
+ status = U_INVALID_STATE_ERROR;
+ return 0.0;
+ }
if (quantity.isZero() && quantity.isNegative()) {
return -0.0;
}
// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
+ PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
};
bool seenNumber() const;
- double getDouble() const;
+ double getDouble(UErrorCode& status) const;
void populateFormattable(Formattable& output, parse_flags_t parseFlags) const;
void testAffixPatternMatcher();
void testGroupingDisabled();
void testCaseFolding();
+ void test20360_BidiOverflow();
+ void testInfiniteRecursion();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO(testCombinedCurrencyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
+ TESTCASE_AUTO(test20360_BidiOverflow);
+ TESTCASE_AUTO(testInfiniteRecursion);
TESTCASE_AUTO_END;
}
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse failed: " + message, resultObject.success());
- assertEquals(
- "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
- assertEquals(
- "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
+ assertEquals("Greedy Parse failed: " + message,
+ cas.expectedCharsConsumed, resultObject.charEnd);
+ assertEquals("Greedy Parse failed: " + message,
+ cas.expectedResultDouble, resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x02)) {
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedResultDouble,
- resultObject.getDouble());
+ resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x04)) {
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Strict Parse failed: " + message, resultObject.success());
- assertEquals(
- "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
- assertEquals(
- "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
+ assertEquals("Strict Parse failed: " + message,
+ cas.expectedCharsConsumed, resultObject.charEnd);
+ assertEquals("Strict Parse failed: " + message,
+ cas.expectedResultDouble, resultObject.getDouble(status));
}
}
}
}
}
+void NumberParserTest::test20360_BidiOverflow() {
+ IcuTestErrorCode status(*this, "test20360_BidiOverflow");
+ UnicodeString inputString;
+ inputString.append(u'-');
+ for (int32_t i=0; i<100000; i++) {
+ inputString.append(u'\u061C');
+ }
+ inputString.append(u'5');
+
+ LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
+ if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
+ return;
+ }
+
+ ParsedNumber resultObject;
+ parser->parse(inputString, true, resultObject, status);
+ assertTrue("Greedy Parse, success", resultObject.success());
+ assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
+ assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
+
+ resultObject.clear();
+ parser->parse(inputString, false, resultObject, status);
+ assertFalse("Non-Greedy Parse, success", resultObject.success());
+ assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
+}
+
+void NumberParserTest::testInfiniteRecursion() {
+ IcuTestErrorCode status(*this, "testInfiniteRecursion");
+ UnicodeString inputString;
+ inputString.append(u'-');
+ for (int32_t i=0; i<200; i++) {
+ inputString.append(u'\u061C');
+ }
+ inputString.append(u'5');
+
+ LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
+ if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
+ return;
+ }
+
+ ParsedNumber resultObject;
+ parser->parse(inputString, false, resultObject, status);
+ assertFalse("Default recursion limit, success", resultObject.success());
+ assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
+
+ parser.adoptInstead(NumberParserImpl::createSimpleParser(
+ "en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
+ resultObject.clear();
+ parser->parse(inputString, false, resultObject, status);
+ assertTrue("Unlimited recursion, success", resultObject.success());
+ assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
+ assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
+}
+
#endif
0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
- parseGreedyRecursive(segment, result);
+ parseGreedy(segment, result);
+ } else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
+ // Start at 1 so that recursionLevels never gets to 0
+ parseLongestRecursive(segment, result, 1);
} else {
- parseLongestRecursive(segment, result);
+ // Arbitrary recursion safety limit: 100 levels.
+ parseLongestRecursive(segment, result, -100);
}
for (NumberParseMatcher matcher : matchers) {
matcher.postProcess(result);
result.postProcess();
}
- private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) {
- // Base Case
- if (segment.length() == 0) {
- return;
- }
-
- int initialOffset = segment.getOffset();
- for (int i = 0; i < matchers.size(); i++) {
+ private void parseGreedy(StringSegment segment, ParsedNumber result) {
+ // Note: this method is not recursive in order to avoid stack overflow.
+ for (int i = 0; i < matchers.size();) {
+ // Base Case
+ if (segment.length() == 0) {
+ return;
+ }
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
+ // Matcher failed smoke test: try the next one
+ i++;
continue;
}
+ int initialOffset = segment.getOffset();
matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
- // In a greedy parse, recurse on only the first match.
- parseGreedyRecursive(segment, result);
- // The following line resets the offset so that the StringSegment says the same across
- // the function
- // call boundary. Since we recurse only once, this line is not strictly necessary.
- segment.setOffset(initialOffset);
- return;
+ // Greedy heuristic: accept the match and loop back
+ i = 0;
+ continue;
+ } else {
+ // Matcher did not match: try the next one
+ i++;
+ continue;
}
}
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
}
- private void parseLongestRecursive(StringSegment segment, ParsedNumber result) {
+ private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) {
// Base Case
if (segment.length() == 0) {
return;
}
+ // Safety against stack overflow
+ if (recursionLevels == 0) {
+ return;
+ }
+
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
ParsedNumber initial = new ParsedNumber();
initial.copyFrom(result);
// If the entire segment was consumed, recurse.
if (segment.getOffset() - initialOffset == charsToConsume) {
- parseLongestRecursive(segment, candidate);
+ parseLongestRecursive(segment, candidate, recursionLevels + 1);
if (candidate.isBetterThan(result)) {
result.copyFrom(candidate);
}
// public static final int PARSE_FLAG_OPTIMIZE = 0x0800; // no longer used
public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000;
public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000;
+ public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {
result.charEnd);
}
}
+
+ @Test
+ public void test20360_BidiOverflow() {
+ StringBuilder inputString = new StringBuilder();
+ inputString.append('-');
+ for (int i=0; i<100000; i++) {
+ inputString.append('\u061C');
+ }
+ inputString.append('5');
+
+ NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
+
+ ParsedNumber resultObject = new ParsedNumber();
+ parser.parse(inputString.toString(), true, resultObject);
+ assertTrue("Greedy Parse, success", resultObject.success());
+ assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
+ assertEquals("Greedy Parse, expected double", -5, resultObject.getNumber().intValue());
+
+ resultObject.clear();
+ parser.parse(inputString.toString(), false, resultObject);
+ assertFalse("Non-Greedy Parse, success", resultObject.success());
+ assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
+ }
+
+ @Test
+ public void testInfiniteRecursion() {
+ StringBuilder inputString = new StringBuilder();
+ inputString.append('-');
+ for (int i=0; i<200; i++) {
+ inputString.append('\u061C');
+ }
+ inputString.append('5');
+
+ NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
+
+ ParsedNumber resultObject = new ParsedNumber();
+ parser.parse(inputString.toString(), false, resultObject);
+ assertFalse("Default recursion limit, success", resultObject.success());
+ assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
+
+ parser = NumberParserImpl.createSimpleParser(
+ ULocale.ENGLISH, "0", ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION);
+ resultObject.clear();
+ parser.parse(inputString.toString(), false, resultObject);
+ assertTrue("Unlimited recursion, success", resultObject.success());
+ assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
+ assertEquals("Unlimited recursion, expected double", -5, resultObject.getNumber().intValue());
+ }
}