ICU-13529 Parsing of redundant rule matches is slow when parsing with RuleBasedNumber...

author George Rhoten <grhoten@users.noreply.github.com>

Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)

committer George Rhoten <grhoten@users.noreply.github.com>

Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)
author George Rhoten <grhoten@users.noreply.github.com>
Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)
committer George Rhoten <grhoten@users.noreply.github.com>
Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)
diff --git a/.gitignore b/.gitignore

index 1d2af48c7148afd33d582574a0934a3bb67d01ee..95cc42712f9051c40e6f093179c731cdfa953024 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -635,6 +635,8 @@ icu4c/source/tools/ctestfw/libsicutest*
  icu4c/source/tools/ctestfw/release
  icu4c/source/tools/ctestfw/x64
  icu4c/source/tools/ctestfw/x86
+icu4c/source/tools/escapesrc/*.d
+icu4c/source/tools/escapesrc/Makefile
  icu4c/source/tools/genbrk/*.d
  icu4c/source/tools/genbrk/*.o
  icu4c/source/tools/genbrk/*.pdb
diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp

index 769fad353fbd5988ff77bcf0262979588ed16e88..d5b368d42300b31410b1deb720151a054e22ac0c 100644 (file)
--- a/icu4c/source/i18n/nfrs.cpp
+++ b/icu4c/source/i18n/nfrs.cpp
@@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
  #endif
  
  UBool
-NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
+NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const
  {
      // try matching each rule in the rule set against the text being
      // parsed.  Whichever one matches the most characters is the one
@@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
  #endif
      // Try each of the negative rules, fraction rules, infinity rules and NaN rules
      for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
-        if (nonNumericalRules[i]) {
+        if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
+            // Mark this rule as being executed so that we don't try to execute it again.
+            nonNumericalExecutedRuleMask |= 1 << i;
+
              Formattable tempResult;
-            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
+            UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
              if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
                  result = tempResult;
                  highWaterMark = workingPos;
@@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
                  continue;
              }
              Formattable tempResult;
-            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
+            UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
              if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
                  result = tempResult;
                  highWaterMark = workingPos;
diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h

index 1e39b289b4dd2fb3b15821c368c42641d2745162..d4797e7ff5547e1ea539de37393f82e3647fc94f 100644 (file)
--- a/icu4c/source/i18n/nfrs.h
+++ b/icu4c/source/i18n/nfrs.h
@@ -55,7 +55,7 @@ public:
      void  format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
      void  format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
  
-    UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
+    UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const;
  
      void appendRules(UnicodeString& result) const; // toString
  
diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp

index f24be11bcdc8f8441e431e2e6d7eeb1b29ac708b..f32ed5a747c057d1f6b84c86cba1ac0720d28329 100644 (file)
--- a/icu4c/source/i18n/nfrule.cpp
+++ b/icu4c/source/i18n/nfrule.cpp
@@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text,
                  ParsePosition& parsePosition,
                  UBool isFractionRule,
                  double upperBound,
+                int32_t nonNumericalExecutedRuleMask,
                  Formattable& resVal) const
  {
      // internally we operate on a copy of the string being parsed
@@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text,
          temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos);
          double partialResult = matchToDelimiter(workText, start, tempBaseValue,
              temp, pp, sub1,
+            nonNumericalExecutedRuleMask,
              upperBound);
  
          // if we got a successful match (or were trying to match a
@@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text,
              temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos);
              partialResult = matchToDelimiter(workText2, 0, partialResult,
                  temp, pp2, sub2,
+                nonNumericalExecutedRuleMask,
                  upperBound);
  
              // if we got a successful match on this second
@@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
                           const UnicodeString& delimiter,
                           ParsePosition& pp,
                           const NFSubstitution* sub,
+                         int32_t nonNumericalExecutedRuleMask,
                           double upperBound) const
  {
         UErrorCode status = U_ZERO_ERROR;
@@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
  #else
                      formatter->isLenient(),
  #endif
+                    nonNumericalExecutedRuleMask,
                      result);
  
                  // if the substitution could match all the text up to
@@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
  #else
              formatter->isLenient(),
  #endif
+            nonNumericalExecutedRuleMask,
              result);
          if (success && (tempPP.getIndex() != 0)) {
              // if there's a successful match (or it's a null
diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h

index 809119ca6c6ce215b24c7d98c242388fadec1fe3..0fabe20237336a19ec13cbda5d0e1aa8cd6a3af1 100644 (file)
--- a/icu4c/source/i18n/nfrule.h
+++ b/icu4c/source/i18n/nfrule.h
@@ -74,6 +74,7 @@ public:
                    ParsePosition& pos, 
                    UBool isFractional, 
                    double upperBound,
+                  int32_t nonNumericalExecutedRuleMask,
                    Formattable& result) const;
  
      UBool shouldRollBack(int64_t number) const;
@@ -94,6 +95,7 @@ private:
      int32_t indexOfAnyRulePrefix() const;
      double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
                              const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub, 
+                            int32_t nonNumericalExecutedRuleMask,
                              double upperBound) const;
      void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
  
diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp

index b5da9821d55beca770cf63d980dc47971a2a925d..ec9e9b873cb9a20947cb19d0249b302eddbbe378 100644 (file)
--- a/icu4c/source/i18n/nfsubs.cpp
+++ b/icu4c/source/i18n/nfsubs.cpp
@@ -155,6 +155,7 @@ public:
          double baseValue,
          double upperBound,
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const {
@@ -221,6 +222,7 @@ public:
          double baseValue,
          double upperBound,
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
@@ -292,6 +294,7 @@ public:
          double baseValue,
          double upperBound,
          UBool /*lenientParse*/,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
  
      virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
@@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text,
                          double baseValue,
                          double upperBound,
                          UBool lenientParse,
+                        int32_t nonNumericalExecutedRuleMask,
                          Formattable& result) const
  {
  #ifdef RBNF_DEBUG
@@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text,
      // on), then also try parsing the text using a default-
      // constructed NumberFormat
      if (ruleSet != NULL) {
-        ruleSet->parse(text, parsePosition, upperBound, result);
+        ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
          if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
              UErrorCode status = U_ZERO_ERROR;
              NumberFormat* fmt = NumberFormat::createInstance(status);
@@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
                               double baseValue,
                               double upperBound,
                               UBool lenientParse,
+                             int32_t nonNumericalExecutedRuleMask,
                               Formattable& result) const
  {
      // if this isn't a >>> substitution, we can just use the
      // inherited parse() routine to do the parsing
      if (ruleToUse == NULL) {
-        return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
+        return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
  
          // but if it IS a >>> substitution, we have to do it here: we
          // use the specific rule's doParse() method, and then we have to
          // do some of the other work of NFRuleSet.parse()
      } else {
-        ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
+        ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result);
  
          if (parsePosition.getIndex() != 0) {
              UErrorCode status = U_ZERO_ERROR;
@@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
                  double baseValue,
                  double /*upperBound*/,
                  UBool lenientParse,
+                int32_t nonNumericalExecutedRuleMask,
                  Formattable& resVal) const
  {
      // if we're not in byDigits mode, we can just use the inherited
      // doParse()
      if (!byDigits) {
-        return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
+        return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
  
          // if we ARE in byDigits mode, parse the text one digit at a time
          // using this substitution's owning rule set (we do this by setting
@@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
          while (workText.length() > 0 && workPos.getIndex() != 0) {
              workPos.setIndex(0);
              Formattable temp;
-            getRuleSet()->parse(workText, workPos, 10, temp);
+            getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
              UErrorCode status = U_ZERO_ERROR;
              digit = temp.getLong(status);
  //            digit = temp.getType() == Formattable::kLong ?
@@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
                                 double baseValue,
                                 double upperBound,
                                 UBool /*lenientParse*/,
+                               int32_t nonNumericalExecutedRuleMask,
                                 Formattable& result) const
  {
      // we don't have to do anything special to do the parsing here,
@@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
  
          while (workText.length() > 0 && workPos.getIndex() != 0) {
              workPos.setIndex(0);
-            getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
+            getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
              if (workPos.getIndex() == 0) {
                  // we failed, either there were no more zeros, or the number was formatted with digits
                  // either way, we're done
@@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
      }
  
      // we've parsed off the zeros, now let's parse the rest from our current position
-    NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);
+    NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result);
  
      if (withZeros) {
          // any base value will do in this case.  is there a way to
diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h

index e8b259137edd1df7ac973dba37d7926c06914134..b8a5fc66198194c6217817e0f91281abf106d70e 100644 (file)
--- a/icu4c/source/i18n/nfsubs.h
+++ b/icu4c/source/i18n/nfsubs.h
@@ -191,6 +191,7 @@ public:
          double baseValue,
          double upperBound, 
          UBool lenientParse,
+        int32_t nonNumericalExecutedRuleMask,
          Formattable& result) const;
      
      /**
diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp

index 66f532e79aaab3ed101b2f7bed763d244b7314b6..1b75e5ee1b73f81fbf2dc817e555f856e36716ca 100644 (file)
--- a/icu4c/source/i18n/rbnf.cpp
+++ b/icu4c/source/i18n/rbnf.cpp
@@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
              ParsePosition working_pp(0);
              Formattable working_result;
  
-            rp->parse(workingText, working_pp, kMaxDouble, working_result);
+            rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
              if (working_pp.getIndex() > high_pp.getIndex()) {
                  high_pp = working_pp;
                  high_result = working_result;
diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp

index 97700251a381119ed9ed6b7d4f1e61e912728a6b..719df6202aa1890460764f6c7cd43631f18d00bc 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.cpp
+++ b/icu4c/source/test/intltest/itrbnf.cpp
@@ -75,6 +75,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
          TESTCASE(23, TestVariableDecimalPoint);
          TESTCASE(24, TestLargeNumbers);
          TESTCASE(25, TestCompactDecimalFormatStyle);
+        TESTCASE(26, TestParseFailure);
  #else
          TESTCASE(0, TestRBNFDisabled);
  #endif
@@ -2283,6 +2284,25 @@ void IntlTestRBNF::TestCompactDecimalFormatStyle() {
      doTest(&rbnf, enTestFullData, false);
  }
  
+void IntlTestRBNF::TestParseFailure() {
+    UErrorCode status = U_ZERO_ERROR;
+    RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status);
+    static const char* testData[][1] = {
+        { "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" },
+        { NULL }
+    };
+    for (int i = 0; testData[i][0]; ++i) {
+        const char* spelledNumber = testData[i][0]; // spelled-out number
+        
+        UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
+        Formattable actualNumber;
+        rbnf.parse(spelledNumberString, actualNumber, status);
+        if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR
+            errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status));
+        }
+    }
+}
+
  void 
  IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) 
  {
diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h

index 540b8033342b2e446fdba793f78fc4adf9e944f4..e58d321362cba303a1de9656d1b8a66863c8656c 100644 (file)
--- a/icu4c/source/test/intltest/itrbnf.h
+++ b/icu4c/source/test/intltest/itrbnf.h
@@ -147,6 +147,7 @@ class IntlTestRBNF : public IntlTest {
      void TestRounding();
      void TestLargeNumbers();
      void TestCompactDecimalFormatStyle();
+    void TestParseFailure();
  
  protected:
    virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java

index 018f8ea869e07e6b7310c96c469848efa6e5815e..cc78cf78f99882df0baeeefb1a32a1b9f8cb016e 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java
@@ -903,7 +903,7 @@ final class NFRule {
       * result is an integer and Double otherwise.  The result is never null.
       */
      public Number doParse(String text, ParsePosition parsePosition, boolean isFractionRule,
-                          double upperBound) {
+                          double upperBound, int nonNumericalExecutedRuleMask) {
  
          // internally we operate on a copy of the string being parsed
          // (because we're going to change it) and use our own ParsePosition
@@ -976,7 +976,7 @@ final class NFRule {
              pp.setIndex(0);
              double partialResult = matchToDelimiter(workText, start, tempBaseValue,
                                                      ruleText.substring(sub1Pos, sub2Pos), rulePatternFormat,
-                                                    pp, sub1, upperBound).doubleValue();
+                                                    pp, sub1, upperBound, nonNumericalExecutedRuleMask).doubleValue();
  
              // if we got a successful match (or were trying to match a
              // null substitution), pp is now pointing at the first unmatched
@@ -994,7 +994,7 @@ final class NFRule {
                  // a real result
                  partialResult = matchToDelimiter(workText2, 0, partialResult,
                                                   ruleText.substring(sub2Pos), rulePatternFormat, pp2, sub2,
-                                                 upperBound).doubleValue();
+                                                 upperBound, nonNumericalExecutedRuleMask).doubleValue();
  
                  // if we got a successful match on this second
                  // matchToDelimiter() call, update the high-water mark
@@ -1121,7 +1121,8 @@ final class NFRule {
       * Double.
       */
      private Number matchToDelimiter(String text, int startPos, double baseVal,
-                                    String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub, double upperBound) {
+                                    String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub,
+                                    double upperBound, int nonNumericalExecutedRuleMask) {
          // if "delimiter" contains real (i.e., non-ignorable) text, search
          // it for "delimiter" beginning at "start".  If that succeeds, then
          // use "sub"'s doParse() method to match the text before the
@@ -1144,7 +1145,7 @@ final class NFRule {
                  String subText = text.substring(0, dPos);
                  if (subText.length() > 0) {
                      tempResult = sub.doParse(subText, tempPP, baseVal, upperBound,
-                                             formatter.lenientParseEnabled());
+                                             formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask);
  
                      // if the substitution could match all the text up to
                      // where we found "delimiter", then this function has
@@ -1192,7 +1193,7 @@ final class NFRule {
              Number result = ZERO;
              // try to match the whole string against the substitution
              Number tempResult = sub.doParse(text, tempPP, baseVal, upperBound,
-                    formatter.lenientParseEnabled());
+                    formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask);
              if (tempPP.getIndex() != 0) {
                  // if there's a successful match (or it's a null
                  // substitution), update pp to point to the first
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java

index 30838f5078bba7f04009cb0aa833a7d30c55fc3f..be207646dcc9d14512785f8cc0ea1bf67a9a3f86 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java
@@ -748,7 +748,7 @@ final class NFRuleSet {
       * this function returns new Long(0), and the parse position is
       * left unchanged.
       */
-    public Number parse(String text, ParsePosition parsePosition, double upperBound) {
+    public Number parse(String text, ParsePosition parsePosition, double upperBound, int nonNumericalExecutedRuleMask) {
          // try matching each rule in the rule set against the text being
          // parsed.  Whichever one matches the most characters is the one
          // that determines the value we return.
@@ -763,9 +763,13 @@ final class NFRuleSet {
          }
  
          // Try each of the negative rules, fraction rules, infinity rules and NaN rules
-        for (NFRule fractionRule : nonNumericalRules) {
-            if (fractionRule != null) {
-                tempResult = fractionRule.doParse(text, parsePosition, false, upperBound);
+        for (int nonNumericalRuleIdx = 0; nonNumericalRuleIdx < nonNumericalRules.length; nonNumericalRuleIdx++) {
+            NFRule nonNumericalRule = nonNumericalRules[nonNumericalRuleIdx];
+            if (nonNumericalRule != null && ((nonNumericalExecutedRuleMask >> nonNumericalRuleIdx) & 1) == 0) {
+                // Mark this rule as being executed so that we don't try to execute it again.
+                nonNumericalExecutedRuleMask |= 1 << nonNumericalRuleIdx;
+
+                tempResult = nonNumericalRule.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask);
                  if (parsePosition.getIndex() > highWaterMark.getIndex()) {
                      result = tempResult;
                      highWaterMark.setIndex(parsePosition.getIndex());
@@ -792,7 +796,7 @@ final class NFRuleSet {
                  continue;
              }
  
-            tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound);
+            tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound, nonNumericalExecutedRuleMask);
              if (parsePosition.getIndex() > highWaterMark.getIndex()) {
                  result = tempResult;
                  highWaterMark.setIndex(parsePosition.getIndex());
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java

index 909bbb64282b6ab14be0b37f3ed3a64e55bd27dc..15ca301ed8cf724dcce4168a261036736906e1dc 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java
@@ -425,7 +425,7 @@ abstract class NFSubstitution {
       * is left unchanged.
       */
      public Number doParse(String text, ParsePosition parsePosition, double baseValue,
-                          double upperBound, boolean lenientParse) {
+                          double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
          Number tempResult;
  
          // figure out the highest base value a rule can have and match
@@ -443,7 +443,7 @@ abstract class NFSubstitution {
          // on), then also try parsing the text using a default-
          // constructed NumberFormat
          if (ruleSet != null) {
-            tempResult = ruleSet.parse(text, parsePosition, upperBound);
+            tempResult = ruleSet.parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask);
              if (lenientParse && !ruleSet.isFractionSet() && parsePosition.getIndex() == 0) {
                  tempResult = ruleSet.owner.getDecimalFormat().parse(text, parsePosition);
              }
@@ -995,17 +995,17 @@ class ModulusSubstitution extends NFSubstitution {
       */
      @Override
    public Number doParse(String text, ParsePosition parsePosition, double baseValue,
-                        double upperBound, boolean lenientParse) {
+                        double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
          // if this isn't a >>> substitution, we can just use the
          // inherited parse() routine to do the parsing
          if (ruleToUse == null) {
-            return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse);
+            return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask);
  
          } else {
              // but if it IS a >>> substitution, we have to do it here: we
              // use the specific rule's doParse() method, and then we have to
              // do some of the other work of NFRuleSet.parse()
-            Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound);
+            Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask);
  
              if (parsePosition.getIndex() != 0) {
                  double result = tempResult.doubleValue();
@@ -1300,11 +1300,11 @@ class FractionalPartSubstitution extends NFSubstitution {
       */
      @Override
    public Number doParse(String text, ParsePosition parsePosition, double baseValue,
-                        double upperBound, boolean lenientParse) {
+                        double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
          // if we're not in byDigits mode, we can just use the inherited
          // doParse()
          if (!byDigits) {
-            return super.doParse(text, parsePosition, baseValue, 0, lenientParse);
+            return super.doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask);
          }
          else {
              // if we ARE in byDigits mode, parse the text one digit at a time
@@ -1320,7 +1320,7 @@ class FractionalPartSubstitution extends NFSubstitution {
              int leadingZeros = 0;
              while (workText.length() > 0 && workPos.getIndex() != 0) {
                  workPos.setIndex(0);
-                digit = ruleSet.parse(workText, workPos, 10).intValue();
+                digit = ruleSet.parse(workText, workPos, 10, nonNumericalExecutedRuleMask).intValue();
                  if (lenientParse && workPos.getIndex() == 0) {
                      Number n = ruleSet.owner.getDecimalFormat().parse(workText, workPos);
                      if (n != null) {
@@ -1626,7 +1626,7 @@ class NumeratorSubstitution extends NFSubstitution {
       */
      @Override
    public Number doParse(String text, ParsePosition parsePosition, double baseValue,
-                        double upperBound, boolean lenientParse) {
+                        double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
          // we don't have to do anything special to do the parsing here,
          // but we have to turn lenient parsing off-- if we leave it on,
          // it SERIOUSLY messes up the algorithm
@@ -1641,7 +1641,7 @@ class NumeratorSubstitution extends NFSubstitution {
  
              while (workText.length() > 0 && workPos.getIndex() != 0) {
                  workPos.setIndex(0);
-                /*digit = */ruleSet.parse(workText, workPos, 1).intValue(); // parse zero or nothing at all
+                /*digit = */ruleSet.parse(workText, workPos, 1, nonNumericalExecutedRuleMask).intValue(); // parse zero or nothing at all
                  if (workPos.getIndex() == 0) {
                      // we failed, either there were no more zeros, or the number was formatted with digits
                      // either way, we're done
@@ -1662,7 +1662,7 @@ class NumeratorSubstitution extends NFSubstitution {
          }
  
          // we've parsed off the zeros, now let's parse the rest from our current position
-        Number result =  super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false);
+        Number result =  super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask);
  
          if (withZeros) {
              // any base value will do in this case.  is there a way to
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java

index 05b4498ea1d3439672150e3ba6f0461cabe1e357..719fefb07711d43e8b74d64005c45f9cb0dc27dc 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java
@@ -1322,7 +1322,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
  
              // try parsing the string with the rule set.  If it gets past the
              // high-water mark, update the high-water mark and the result
-            tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE);
+            tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE, 0);
              if (workingPos.getIndex() > highWaterMark.getIndex()) {
                  result = tempResult;
                  highWaterMark.setIndex(workingPos.getIndex());
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java

index dfdf0222fa8cdfcc772674098e14d2b14c533c26..e5e9f30d71ebe28e6a25caef5ea1003dacc8a7b9 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java
@@ -8,6 +8,7 @@
   */
  package com.ibm.icu.dev.test.format;
  
+import java.text.ParseException;
  import java.util.Locale;
  
  import org.junit.Test;
@@ -161,4 +162,22 @@ public class RBNFParseTest extends TestFmwk {
          logln("rbnf_fr:" + rbnf_en.getDefaultRuleSetName());
          parseList(rbnf_en, rbnf_fr, lists);
      }
+
+    @Test
+    public void TestBadParse() {
+        RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(Locale.JAPAN, RuleBasedNumberFormat.SPELLOUT);
+        String[] testData = {
+                "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB",
+        };
+
+        for (String testString : testData) {
+            try {
+                rbnf.parse(testString);
+                errln("Unexpected success: " + testString);
+            }
+            catch (ParseException e) {
+                // success!
+            }
+        }
+    }
  }
author	George Rhoten <grhoten@users.noreply.github.com>
	Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)
committer	George Rhoten <grhoten@users.noreply.github.com>
	Tue, 13 Feb 2018 23:48:48 +0000 (23:48 +0000)
.gitignore		patch \| blob \| history
icu4c/source/i18n/nfrs.cpp		patch \| blob \| history
icu4c/source/i18n/nfrs.h		patch \| blob \| history
icu4c/source/i18n/nfrule.cpp		patch \| blob \| history
icu4c/source/i18n/nfrule.h		patch \| blob \| history
icu4c/source/i18n/nfsubs.cpp		patch \| blob \| history
icu4c/source/i18n/nfsubs.h		patch \| blob \| history
icu4c/source/i18n/rbnf.cpp		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.cpp		patch \| blob \| history
icu4c/source/test/intltest/itrbnf.h		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/NFRuleSet.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RBNFParseTest.java		patch \| blob \| history