From: George Rhoten Date: Tue, 13 Feb 2018 23:48:48 +0000 (+0000) Subject: ICU-13529 Parsing of redundant rule matches is slow when parsing with RuleBasedNumber... X-Git-Tag: release-61-rc~99 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4bac7035582b9da9ec9842ad971d77bc6dee5655;p=icu ICU-13529 Parsing of redundant rule matches is slow when parsing with RuleBasedNumberFormat X-SVN-Rev: 40913 --- diff --git a/.gitignore b/.gitignore index 1d2af48c714..95cc42712f9 100644 --- a/.gitignore +++ b/.gitignore @@ -635,6 +635,8 @@ icu4c/source/tools/ctestfw/libsicutest* icu4c/source/tools/ctestfw/release icu4c/source/tools/ctestfw/x64 icu4c/source/tools/ctestfw/x86 +icu4c/source/tools/escapesrc/*.d +icu4c/source/tools/escapesrc/Makefile icu4c/source/tools/genbrk/*.d icu4c/source/tools/genbrk/*.o icu4c/source/tools/genbrk/*.pdb diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp index 769fad353fb..d5b368d4230 100644 --- a/icu4c/source/i18n/nfrs.cpp +++ b/icu4c/source/i18n/nfrs.cpp @@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) { #endif UBool -NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const +NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const { // try matching each rule in the rule set against the text being // parsed. Whichever one matches the most characters is the one @@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun #endif // Try each of the negative rules, fraction rules, infinity rules and NaN rules for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { - if (nonNumericalRules[i]) { + if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) { + // Mark this rule as being executed so that we don't try to execute it again. + nonNumericalExecutedRuleMask |= 1 << i; + Formattable tempResult; - UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult); + UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { result = tempResult; highWaterMark = workingPos; @@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun continue; } Formattable tempResult; - UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); + UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult); if (success && workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark = workingPos; diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h index 1e39b289b4d..d4797e7ff55 100644 --- a/icu4c/source/i18n/nfrs.h +++ b/icu4c/source/i18n/nfrs.h @@ -55,7 +55,7 @@ public: void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; - UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const; + UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const; void appendRules(UnicodeString& result) const; // toString diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index f24be11bcdc..f32ed5a747c 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text, ParsePosition& parsePosition, UBool isFractionRule, double upperBound, + int32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // internally we operate on a copy of the string being parsed @@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos); double partialResult = matchToDelimiter(workText, start, tempBaseValue, temp, pp, sub1, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match (or were trying to match a @@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text, temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos); partialResult = matchToDelimiter(workText2, 0, partialResult, temp, pp2, sub2, + nonNumericalExecutedRuleMask, upperBound); // if we got a successful match on this second @@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + int32_t nonNumericalExecutedRuleMask, double upperBound) const { UErrorCode status = U_ZERO_ERROR; @@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); // if the substitution could match all the text up to @@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text, #else formatter->isLenient(), #endif + nonNumericalExecutedRuleMask, result); if (success && (tempPP.getIndex() != 0)) { // if there's a successful match (or it's a null diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h index 809119ca6c6..0fabe202373 100644 --- a/icu4c/source/i18n/nfrule.h +++ b/icu4c/source/i18n/nfrule.h @@ -74,6 +74,7 @@ public: ParsePosition& pos, UBool isFractional, double upperBound, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const; UBool shouldRollBack(int64_t number) const; @@ -94,6 +95,7 @@ private: int32_t indexOfAnyRulePrefix() const; double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue, const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, + int32_t nonNumericalExecutedRuleMask, double upperBound) const; void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const; diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp index b5da9821d55..ec9e9b873cb 100644 --- a/icu4c/source/i18n/nfsubs.cpp +++ b/icu4c/source/i18n/nfsubs.cpp @@ -155,6 +155,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { @@ -221,6 +222,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; } @@ -292,6 +294,7 @@ public: double baseValue, double upperBound, UBool /*lenientParse*/, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const; virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; } @@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const { #ifdef RBNF_DEBUG @@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text, // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != NULL) { - ruleSet->parse(text, parsePosition, upperBound, result); + ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result); if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) { UErrorCode status = U_ZERO_ERROR; NumberFormat* fmt = NumberFormat::createInstance(status); @@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const { // if this isn't a >>> substitution, we can just use the // inherited parse() routine to do the parsing if (ruleToUse == NULL) { - return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result); + return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result); // but if it IS a >>> substitution, we have to do it here: we // use the specific rule's doParse() method, and then we have to // do some of the other work of NFRuleSet.parse() } else { - ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result); + ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result); if (parsePosition.getIndex() != 0) { UErrorCode status = U_ZERO_ERROR; @@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, double baseValue, double /*upperBound*/, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& resVal) const { // if we're not in byDigits mode, we can just use the inherited // doParse() if (!byDigits) { - return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal); + return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal); // if we ARE in byDigits mode, parse the text one digit at a time // using this substitution's owning rule set (we do this by setting @@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); Formattable temp; - getRuleSet()->parse(workText, workPos, 10, temp); + getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp); UErrorCode status = U_ZERO_ERROR; digit = temp.getLong(status); // digit = temp.getType() == Formattable::kLong ? @@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, double baseValue, double upperBound, UBool /*lenientParse*/, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const { // we don't have to do anything special to do the parsing here, @@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); - getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all + getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all if (workPos.getIndex() == 0) { // we failed, either there were no more zeros, or the number was formatted with digits // either way, we're done @@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text, } // we've parsed off the zeros, now let's parse the rest from our current position - NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result); + NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result); if (withZeros) { // any base value will do in this case. is there a way to diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h index e8b259137ed..b8a5fc66198 100644 --- a/icu4c/source/i18n/nfsubs.h +++ b/icu4c/source/i18n/nfsubs.h @@ -191,6 +191,7 @@ public: double baseValue, double upperBound, UBool lenientParse, + int32_t nonNumericalExecutedRuleMask, Formattable& result) const; /** diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index 66f532e79aa..1b75e5ee1b7 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, ParsePosition working_pp(0); Formattable working_result; - rp->parse(workingText, working_pp, kMaxDouble, working_result); + rp->parse(workingText, working_pp, kMaxDouble, 0, working_result); if (working_pp.getIndex() > high_pp.getIndex()) { high_pp = working_pp; high_result = working_result; diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index 97700251a38..719df6202aa 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -75,6 +75,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, TESTCASE(23, TestVariableDecimalPoint); TESTCASE(24, TestLargeNumbers); TESTCASE(25, TestCompactDecimalFormatStyle); + TESTCASE(26, TestParseFailure); #else TESTCASE(0, TestRBNFDisabled); #endif @@ -2283,6 +2284,25 @@ void IntlTestRBNF::TestCompactDecimalFormatStyle() { doTest(&rbnf, enTestFullData, false); } +void IntlTestRBNF::TestParseFailure() { + UErrorCode status = U_ZERO_ERROR; + RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status); + static const char* testData[][1] = { + { "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" }, + { NULL } + }; + for (int i = 0; testData[i][0]; ++i) { + const char* spelledNumber = testData[i][0]; // spelled-out number + + UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape(); + Formattable actualNumber; + rbnf.parse(spelledNumberString, actualNumber, status); + if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR + errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status)); + } + } +} + void IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) { diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h index 540b8033342..e58d321362c 100644 --- a/icu4c/source/test/intltest/itrbnf.h +++ b/icu4c/source/test/intltest/itrbnf.h @@ -147,6 +147,7 @@ class IntlTestRBNF : public IntlTest { void TestRounding(); void TestLargeNumbers(); void TestCompactDecimalFormatStyle(); + void TestParseFailure(); protected: virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java index 018f8ea869e..cc78cf78f99 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java @@ -903,7 +903,7 @@ final class NFRule { * result is an integer and Double otherwise. The result is never null. */ public Number doParse(String text, ParsePosition parsePosition, boolean isFractionRule, - double upperBound) { + double upperBound, int nonNumericalExecutedRuleMask) { // internally we operate on a copy of the string being parsed // (because we're going to change it) and use our own ParsePosition @@ -976,7 +976,7 @@ final class NFRule { pp.setIndex(0); double partialResult = matchToDelimiter(workText, start, tempBaseValue, ruleText.substring(sub1Pos, sub2Pos), rulePatternFormat, - pp, sub1, upperBound).doubleValue(); + pp, sub1, upperBound, nonNumericalExecutedRuleMask).doubleValue(); // if we got a successful match (or were trying to match a // null substitution), pp is now pointing at the first unmatched @@ -994,7 +994,7 @@ final class NFRule { // a real result partialResult = matchToDelimiter(workText2, 0, partialResult, ruleText.substring(sub2Pos), rulePatternFormat, pp2, sub2, - upperBound).doubleValue(); + upperBound, nonNumericalExecutedRuleMask).doubleValue(); // if we got a successful match on this second // matchToDelimiter() call, update the high-water mark @@ -1121,7 +1121,8 @@ final class NFRule { * Double. */ private Number matchToDelimiter(String text, int startPos, double baseVal, - String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub, double upperBound) { + String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub, + double upperBound, int nonNumericalExecutedRuleMask) { // if "delimiter" contains real (i.e., non-ignorable) text, search // it for "delimiter" beginning at "start". If that succeeds, then // use "sub"'s doParse() method to match the text before the @@ -1144,7 +1145,7 @@ final class NFRule { String subText = text.substring(0, dPos); if (subText.length() > 0) { tempResult = sub.doParse(subText, tempPP, baseVal, upperBound, - formatter.lenientParseEnabled()); + formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask); // if the substitution could match all the text up to // where we found "delimiter", then this function has @@ -1192,7 +1193,7 @@ final class NFRule { Number result = ZERO; // try to match the whole string against the substitution Number tempResult = sub.doParse(text, tempPP, baseVal, upperBound, - formatter.lenientParseEnabled()); + formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask); if (tempPP.getIndex() != 0) { // if there's a successful match (or it's a null // substitution), update pp to point to the first diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java index 30838f5078b..be207646dcc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java @@ -748,7 +748,7 @@ final class NFRuleSet { * this function returns new Long(0), and the parse position is * left unchanged. */ - public Number parse(String text, ParsePosition parsePosition, double upperBound) { + public Number parse(String text, ParsePosition parsePosition, double upperBound, int nonNumericalExecutedRuleMask) { // try matching each rule in the rule set against the text being // parsed. Whichever one matches the most characters is the one // that determines the value we return. @@ -763,9 +763,13 @@ final class NFRuleSet { } // Try each of the negative rules, fraction rules, infinity rules and NaN rules - for (NFRule fractionRule : nonNumericalRules) { - if (fractionRule != null) { - tempResult = fractionRule.doParse(text, parsePosition, false, upperBound); + for (int nonNumericalRuleIdx = 0; nonNumericalRuleIdx < nonNumericalRules.length; nonNumericalRuleIdx++) { + NFRule nonNumericalRule = nonNumericalRules[nonNumericalRuleIdx]; + if (nonNumericalRule != null && ((nonNumericalExecutedRuleMask >> nonNumericalRuleIdx) & 1) == 0) { + // Mark this rule as being executed so that we don't try to execute it again. + nonNumericalExecutedRuleMask |= 1 << nonNumericalRuleIdx; + + tempResult = nonNumericalRule.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask); if (parsePosition.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark.setIndex(parsePosition.getIndex()); @@ -792,7 +796,7 @@ final class NFRuleSet { continue; } - tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound); + tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound, nonNumericalExecutedRuleMask); if (parsePosition.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark.setIndex(parsePosition.getIndex()); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java index 909bbb64282..15ca301ed8c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java @@ -425,7 +425,7 @@ abstract class NFSubstitution { * is left unchanged. */ public Number doParse(String text, ParsePosition parsePosition, double baseValue, - double upperBound, boolean lenientParse) { + double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) { Number tempResult; // figure out the highest base value a rule can have and match @@ -443,7 +443,7 @@ abstract class NFSubstitution { // on), then also try parsing the text using a default- // constructed NumberFormat if (ruleSet != null) { - tempResult = ruleSet.parse(text, parsePosition, upperBound); + tempResult = ruleSet.parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask); if (lenientParse && !ruleSet.isFractionSet() && parsePosition.getIndex() == 0) { tempResult = ruleSet.owner.getDecimalFormat().parse(text, parsePosition); } @@ -995,17 +995,17 @@ class ModulusSubstitution extends NFSubstitution { */ @Override public Number doParse(String text, ParsePosition parsePosition, double baseValue, - double upperBound, boolean lenientParse) { + double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) { // if this isn't a >>> substitution, we can just use the // inherited parse() routine to do the parsing if (ruleToUse == null) { - return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse); + return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask); } else { // but if it IS a >>> substitution, we have to do it here: we // use the specific rule's doParse() method, and then we have to // do some of the other work of NFRuleSet.parse() - Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound); + Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask); if (parsePosition.getIndex() != 0) { double result = tempResult.doubleValue(); @@ -1300,11 +1300,11 @@ class FractionalPartSubstitution extends NFSubstitution { */ @Override public Number doParse(String text, ParsePosition parsePosition, double baseValue, - double upperBound, boolean lenientParse) { + double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) { // if we're not in byDigits mode, we can just use the inherited // doParse() if (!byDigits) { - return super.doParse(text, parsePosition, baseValue, 0, lenientParse); + return super.doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask); } else { // if we ARE in byDigits mode, parse the text one digit at a time @@ -1320,7 +1320,7 @@ class FractionalPartSubstitution extends NFSubstitution { int leadingZeros = 0; while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); - digit = ruleSet.parse(workText, workPos, 10).intValue(); + digit = ruleSet.parse(workText, workPos, 10, nonNumericalExecutedRuleMask).intValue(); if (lenientParse && workPos.getIndex() == 0) { Number n = ruleSet.owner.getDecimalFormat().parse(workText, workPos); if (n != null) { @@ -1626,7 +1626,7 @@ class NumeratorSubstitution extends NFSubstitution { */ @Override public Number doParse(String text, ParsePosition parsePosition, double baseValue, - double upperBound, boolean lenientParse) { + double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) { // we don't have to do anything special to do the parsing here, // but we have to turn lenient parsing off-- if we leave it on, // it SERIOUSLY messes up the algorithm @@ -1641,7 +1641,7 @@ class NumeratorSubstitution extends NFSubstitution { while (workText.length() > 0 && workPos.getIndex() != 0) { workPos.setIndex(0); - /*digit = */ruleSet.parse(workText, workPos, 1).intValue(); // parse zero or nothing at all + /*digit = */ruleSet.parse(workText, workPos, 1, nonNumericalExecutedRuleMask).intValue(); // parse zero or nothing at all if (workPos.getIndex() == 0) { // we failed, either there were no more zeros, or the number was formatted with digits // either way, we're done @@ -1662,7 +1662,7 @@ class NumeratorSubstitution extends NFSubstitution { } // we've parsed off the zeros, now let's parse the rest from our current position - Number result = super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false); + Number result = super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask); if (withZeros) { // any base value will do in this case. is there a way to diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java index 05b4498ea1d..719fefb0771 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java @@ -1322,7 +1322,7 @@ public class RuleBasedNumberFormat extends NumberFormat { // try parsing the string with the rule set. If it gets past the // high-water mark, update the high-water mark and the result - tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE); + tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE, 0); if (workingPos.getIndex() > highWaterMark.getIndex()) { result = tempResult; highWaterMark.setIndex(workingPos.getIndex()); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java index dfdf0222fa8..e5e9f30d71e 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java @@ -8,6 +8,7 @@ */ package com.ibm.icu.dev.test.format; +import java.text.ParseException; import java.util.Locale; import org.junit.Test; @@ -161,4 +162,22 @@ public class RBNFParseTest extends TestFmwk { logln("rbnf_fr:" + rbnf_en.getDefaultRuleSetName()); parseList(rbnf_en, rbnf_fr, lists); } + + @Test + public void TestBadParse() { + RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(Locale.JAPAN, RuleBasedNumberFormat.SPELLOUT); + String[] testData = { + "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB", + }; + + for (String testString : testData) { + try { + rbnf.parse(testString); + errln("Unexpected success: " + testString); + } + catch (ParseException e) { + // success! + } + } + } }