ICU-13804 Making number parsing code more robust when given empty symbol strings.

author Shane Carr <shane@unicode.org>

Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)

committer Shane Carr <shane@unicode.org>

Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
author Shane Carr <shane@unicode.org>
Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
committer Shane Carr <shane@unicode.org>
Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp

index 2ddafbe50fa564ef29d130a31a970bafd3442dab..ae8196ec483799f0ec1fd328bab1110e60d0d30d 100644 (file)
--- a/icu4c/source/i18n/numparse_currency.cpp
+++ b/icu4c/source/i18n/numparse_currency.cpp
@@ -62,7 +62,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
      // Try to match a currency spacing separator.
      int32_t initialOffset = segment.getOffset();
      bool maybeMore = false;
-    if (result.seenNumber()) {
+    if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
          int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
          if (overlap == beforeSuffixInsert.length()) {
              segment.adjustOffset(overlap);
@@ -79,7 +79,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
      }
  
      // Try to match a currency spacing separator.
-    if (!result.seenNumber()) {
+    if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
          int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
          if (overlap == afterPrefixInsert.length()) {
              segment.adjustOffset(overlap);
@@ -95,7 +95,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
                                              UErrorCode& status) const {
      bool maybeMore = false;
  
-    int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+    int32_t overlap1;
+    if (!fCurrency1.isEmpty()) {
+        overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+    } else {
+        overlap1 = -1;
+    }
      maybeMore = maybeMore || overlap1 == segment.length();
      if (overlap1 == fCurrency1.length()) {
          utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
@@ -104,7 +109,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
          return maybeMore;
      }
  
-    int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+    int32_t overlap2;
+    if (!fCurrency2.isEmpty()) {
+        overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+    } else {
+        overlap2 = -1;
+    }
      maybeMore = maybeMore || overlap2 == segment.length();
      if (overlap2 == fCurrency2.length()) {
          utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
diff --git a/icu4c/source/i18n/numparse_decimal.cpp b/icu4c/source/i18n/numparse_decimal.cpp

index 886704386d93d42c806bbce345a0bac69ac34cf3..b120c5c6ad2f91f35ea1d40a98d6b72d0b5ba393 100644 (file)
--- a/icu4c/source/i18n/numparse_decimal.cpp
+++ b/icu4c/source/i18n/numparse_decimal.cpp
@@ -44,12 +44,14 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
              strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
      if (decimalKey >= 0) {
          decimalUniSet = unisets::get(decimalKey);
-    } else {
+    } else if (!decimalSeparator.isEmpty()) {
          auto* set = new UnicodeSet();
          set->add(decimalSeparator.char32At(0));
          set->freeze();
          decimalUniSet = set;
          fLocalDecimalUniSet.adoptInstead(set);
+    } else {
+        decimalUniSet = unisets::get(unisets::EMPTY);
      }
  
      if (groupingKey >= 0 && decimalKey >= 0) {
@@ -161,6 +163,9 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
          if (digit == -1 && !fLocalDigitStrings.isNull()) {
              for (int32_t i = 0; i < 10; i++) {
                  const UnicodeString& str = fLocalDigitStrings[i];
+                if (str.isEmpty()) {
+                    continue;
+                }
                  int32_t overlap = segment.getCommonPrefixLength(str);
                  if (overlap == str.length()) {
                      segment.adjustOffset(overlap);
@@ -191,7 +196,7 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
  
          // 1) Attempt the decimal separator string literal.
          // if (we have not seen a decimal separator yet) { ... }
-        if (actualDecimalString.isBogus()) {
+        if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
              int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
              maybeMore = maybeMore || (overlap == segment.length());
              if (overlap == decimalSeparator.length()) {
@@ -211,7 +216,8 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
  
          // 2.5) Attempt to match a new the grouping separator string literal.
          // if (we have not seen a grouping or decimal separator yet) { ... }
-        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
+        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
+            !groupingSeparator.isEmpty()) {
              int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
              maybeMore = maybeMore || (overlap == segment.length());
              if (overlap == groupingSeparator.length()) {
diff --git a/icu4c/source/i18n/numparse_scientific.cpp b/icu4c/source/i18n/numparse_scientific.cpp

index 3950392bf26956d323b32df0befaf3818b693978..611695e57d4743ccd7e117eda1bcbd662a151977 100644 (file)
--- a/icu4c/source/i18n/numparse_scientific.cpp
+++ b/icu4c/source/i18n/numparse_scientific.cpp
@@ -57,6 +57,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
      }
  
      // First match the scientific separator, and then match another number after it.
+    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
      int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
      if (overlap1 == fExponentSeparatorString.length()) {
          // Full exponent separator match.
@@ -75,6 +76,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
          } else if (segment.startsWith(plusSignSet())) {
              segment.adjustOffsetByCodePoint();
          } else if (segment.startsWith(fCustomMinusSign)) {
+            // Note: call site is guarded with startsWith, which returns false on empty string
              int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
              if (overlap2 != fCustomMinusSign.length()) {
                  // Partial custom sign match; un-match the exponent separator.
@@ -84,6 +86,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
              exponentSign = -1;
              segment.adjustOffset(overlap2);
          } else if (segment.startsWith(fCustomPlusSign)) {
+            // Note: call site is guarded with startsWith, which returns false on empty string
              int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
              if (overlap2 != fCustomPlusSign.length()) {
                  // Partial custom sign match; un-match the exponent separator.
diff --git a/icu4c/source/i18n/numparse_stringsegment.cpp b/icu4c/source/i18n/numparse_stringsegment.cpp

index 0c5d4267dc526094ca7c0e959c1298628a17b001..3db4fe618a603a0043151c01af93793957c818e7 100644 (file)
--- a/icu4c/source/i18n/numparse_stringsegment.cpp
+++ b/icu4c/source/i18n/numparse_stringsegment.cpp
@@ -112,6 +112,7 @@ int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other)
  }
  
  int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
+    U_ASSERT(other.length() > 0);
      int32_t offset = 0;
      for (; offset < uprv_min(length(), other.length());) {
          // TODO: case-fold code points, not chars
diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h

index 50b98be7333a82be6e21fb24eb37e93c90dd0a94..ab591eaba83af52bd89f7247a0ba6edc9f9d3fa3 100644 (file)
--- a/icu4c/source/i18n/numparse_types.h
+++ b/icu4c/source/i18n/numparse_types.h
@@ -247,7 +247,13 @@ class U_I18N_API StringSegment : public UMemory {
       * since the first 2 characters are the same.
       *
       * <p>
-     * This method will perform case folding if case folding is enabled for the parser.
+     * This method only returns offsets along code point boundaries.
+     *
+     * <p>
+     * This method will perform case folding if case folding was enabled in the constructor.
+     *
+     * <p>
+     * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
       */
      int32_t getCommonPrefixLength(const UnicodeString& other);
  
diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp

index 515f514484d62e0a37227b9807f39f6ab5d43710..12172fcf166755b2c83bbc642124ef7fa65601f7 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@@ -214,6 +214,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
    TESTCASE_AUTO(Test11640_TripleCurrencySymbol);
    TESTCASE_AUTO(Test13763_FieldPositionIteratorOffset);
    TESTCASE_AUTO(Test13777_ParseLongNameNonCurrencyMode);
+  TESTCASE_AUTO(Test13804_EmptyStringsWhenParsing);
    TESTCASE_AUTO_END;
  }
  
@@ -9064,4 +9065,80 @@ void NumberFormatTest::Test13777_ParseLongNameNonCurrencyMode() {
      expect2(*df, 1.5, u"1.50 US dollars");
  }
  
+void NumberFormatTest::Test13804_EmptyStringsWhenParsing() {
+    IcuTestErrorCode status(*this, "Test13804_EmptyStringsWhenParsing");
+
+    DecimalFormatSymbols dfs("en", status);
+    if (status.errIfFailureAndReset()) {
+        return;
+    }
+    dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kTwoDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kThreeDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kFourDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kFiveDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kSixDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kSevenDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kEightDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kNineDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kExponentMultiplicationSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMinusSignSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"", FALSE);
+    dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, FALSE, u"");
+    dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, TRUE, u"");
+    dfs.setSymbol(DecimalFormatSymbols::kPercentSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kPerMillSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kPlusSignSymbol, u"", FALSE);
+
+    DecimalFormat df("0", dfs, status);
+    if (status.errIfFailureAndReset()) {
+        return;
+    }
+    df.setGroupingUsed(TRUE);
+    df.setScientificNotation(TRUE);
+    df.setLenient(TRUE); // enable all matchers
+    {
+        UnicodeString result;
+        df.format(0, result); // should not crash or hit infinite loop
+    }
+    const char16_t* samples[] = {
+            u"",
+            u"123",
+            u"$123",
+            u"-",
+            u"+",
+            u"44%",
+            u"1E+2.3"
+    };
+    for (auto& sample : samples) {
+        logln(UnicodeString(u"Attempting parse on: ") + sample);
+        status.setScope(sample);
+        // We don't care about the results, only that we don't crash and don't loop.
+        Formattable result;
+        ParsePosition ppos(0);
+        df.parse(sample, result, ppos);
+        ppos = ParsePosition(0);
+        df.parseCurrency(sample, ppos);
+        status.errIfFailureAndReset();
+    }
+
+    // Test with a nonempty exponent separator symbol to cover more code
+    dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"E", FALSE);
+    df.setDecimalFormatSymbols(dfs);
+    {
+        Formattable result;
+        ParsePosition ppos(0);
+        df.parse(u"1E+2.3", result, ppos);
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h

index 60922c150e77d28a4ecabdfba281e35934f51493..1c7c86229649de49e826e3e908c1850b42639261 100644 (file)
--- a/icu4c/source/test/intltest/numfmtst.h
+++ b/icu4c/source/test/intltest/numfmtst.h
@@ -278,6 +278,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
      void Test11640_TripleCurrencySymbol();
      void Test13763_FieldPositionIteratorOffset();
      void Test13777_ParseLongNameNonCurrencyMode();
+    void Test13804_EmptyStringsWhenParsing();
  
   private:
      UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java

index b4543ef69e26c313f6907dcb5fb4a10b4fb9c767..63f250a012aca982699d07fe1d71c5458f22b2e9 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java
@@ -2,6 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl;
  
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
  import java.util.EnumMap;
  import java.util.Map;
  
@@ -90,7 +92,7 @@ public class StaticUnicodeSets {
       * @return key1 if the set contains str, or COUNT if not.
       */
      public static Key chooseFrom(String str, Key key1) {
-        return get(key1).contains(str) ? key1 : null;
+        return safeContains(get(key1), str) ? key1 : null;
      }
  
      /**
@@ -108,7 +110,7 @@ public class StaticUnicodeSets {
       *         contains str.
       */
      public static Key chooseFrom(String str, Key key1, Key key2) {
-        return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
+        return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
      }
  
      /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java

index 813355fbfb091d7812c74295b28b3ee9554af3e7..7ccf083ed61b48f9d24d1c54120820b1a1fe88b4 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java
@@ -156,6 +156,9 @@ public class StringSegment implements CharSequence {
       *
       * <p>
       * This method will perform case folding if case folding was enabled in the constructor.
+     *
+     * <p>
+     * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
       */
      public int getCommonPrefixLength(CharSequence other) {
          return getPrefixLengthInternal(other, foldCase);
@@ -170,8 +173,10 @@ public class StringSegment implements CharSequence {
      }
  
      private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
+        assert other.length() != 0;
          int offset = 0;
          for (; offset < Math.min(length(), other.length());) {
+            // TODO: case-fold code points, not chars
              int cp1 = Character.codePointAt(this, offset);
              int cp2 = Character.codePointAt(other, offset);
              if (!codePointsEqual(cp1, cp2, foldCase)) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java

index a48be3544a7f0467280ba79dab02bfb51d1ed02f..c46db004603c9f982687fb10ac776d2a85c96766 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java
@@ -97,7 +97,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
          // Try to match a currency spacing separator.
          int initialOffset = segment.getOffset();
          boolean maybeMore = false;
-        if (result.seenNumber()) {
+        if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
              int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
              if (overlap == beforeSuffixInsert.length()) {
                  segment.adjustOffset(overlap);
@@ -114,7 +114,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
          }
  
          // Try to match a currency spacing separator.
-        if (!result.seenNumber()) {
+        if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
              int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
              if (overlap == afterPrefixInsert.length()) {
                  segment.adjustOffset(overlap);
@@ -130,7 +130,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
      private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
          boolean maybeMore = false;
  
-        int overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+        int overlap1;
+        if (!currency1.isEmpty()) {
+            overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+        } else {
+            overlap1 = -1;
+        }
          maybeMore = maybeMore || overlap1 == segment.length();
          if (overlap1 == currency1.length()) {
              result.currencyCode = isoCode;
@@ -139,7 +144,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
              return maybeMore;
          }
  
-        int overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+        int overlap2;
+        if (!currency2.isEmpty()) {
+            overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+        } else {
+            overlap2 = -1;
+        }
          maybeMore = maybeMore || overlap2 == segment.length();
          if (overlap2 == currency2.length()) {
              result.currencyCode = isoCode;
@@ -169,6 +179,9 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
              int longestFullMatch = 0;
              for (int i=0; i<StandardPlural.COUNT; i++) {
                  String name = localLongNames[i];
+                if (name.isEmpty()) {
+                    continue;
+                }
                  int overlap = segment.getCommonPrefixLength(name);
                  if (overlap == name.length() && name.length() > longestFullMatch) {
                      longestFullMatch = name.length();
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java

index 17ef898f01ae653cfde8587c717106203b93d41f..4825b1ad4bd3d74e35ad5e2e7df3ffa3c12e3b7c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
@@ -71,8 +71,10 @@ public class DecimalMatcher implements NumberParseMatcher {
                  strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
          if (decimalKey != null) {
              decimalUniSet = StaticUnicodeSets.get(decimalKey);
-        } else {
+        } else if (!decimalSeparator.isEmpty()) {
              decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
+        } else {
+            decimalUniSet = UnicodeSet.EMPTY;
          }
  
          if (groupingKey != null && decimalKey != null) {
@@ -178,6 +180,9 @@ public class DecimalMatcher implements NumberParseMatcher {
              if (digit == -1 && digitStrings != null) {
                  for (int i = 0; i < digitStrings.length; i++) {
                      String str = digitStrings[i];
+                    if (str.isEmpty()) {
+                        continue;
+                    }
                      int overlap = segment.getCommonPrefixLength(str);
                      if (overlap == str.length()) {
                          segment.adjustOffset(overlap);
@@ -207,7 +212,7 @@ public class DecimalMatcher implements NumberParseMatcher {
  
              // 1) Attempt the decimal separator string literal.
              // if (we have not seen a decimal separator yet) { ... }
-            if (actualDecimalString == null) {
+            if (actualDecimalString == null && !decimalSeparator.isEmpty()) {
                  int overlap = segment.getCommonPrefixLength(decimalSeparator);
                  maybeMore = maybeMore || (overlap == segment.length());
                  if (overlap == decimalSeparator.length()) {
@@ -227,7 +232,10 @@ public class DecimalMatcher implements NumberParseMatcher {
  
              // 2.5) Attempt to match a new the grouping separator string literal.
              // if (we have not seen a grouping or decimal separator yet) { ... }
-            if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
+            if (!groupingDisabled
+                    && actualGroupingString == null
+                    && actualDecimalString == null
+                    && !groupingSeparator.isEmpty()) {
                  int overlap = segment.getCommonPrefixLength(groupingSeparator);
                  maybeMore = maybeMore || (overlap == segment.length());
                  if (overlap == groupingSeparator.length()) {
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java

index 081112797b9d35f94ae29ba7043c571db2472057..0aa915aca63db764c6abbd99c18e72124e34f2a6 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
@@ -2,6 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl.number.parse;
  
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
  import com.ibm.icu.impl.StaticUnicodeSets;
  import com.ibm.icu.impl.StringSegment;
  import com.ibm.icu.text.DecimalFormatSymbols;
@@ -16,7 +18,7 @@ public class InfinityMatcher extends SymbolMatcher {
  
      public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
          String symbolString = symbols.getInfinity();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
              return DEFAULT;
          } else {
              return new InfinityMatcher(symbolString);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java

index c31c29a1cac5ceb33eef097f6d7758eb65efe542..126b9b44206c635586afe89b167424c0f2a3d4df 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
@@ -2,6 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl.number.parse;
  
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
  import com.ibm.icu.impl.StaticUnicodeSets;
  import com.ibm.icu.impl.StringSegment;
  import com.ibm.icu.text.DecimalFormatSymbols;
@@ -17,7 +19,7 @@ public class MinusSignMatcher extends SymbolMatcher {
  
      public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
          String symbolString = symbols.getMinusSignString();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
              return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
          } else {
              return new MinusSignMatcher(symbolString, allowTrailing);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java

index 7163dc6cf96debb44299c220efeb33d417ab6983..1fbab42b90090fb859bd00431bca9d6820bedd19 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
@@ -40,4 +40,9 @@ public class ParsingUtils {
          }
      }
  
+    // TODO: Remove this helper function (and update call sites) when #13805 is fixed
+    public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
+        return str.length() != 0 && uniset.contains(str);
+    }
+
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java

index e65a0b07961090a70fb763bbcdf9b019021242e8..7887f04c78dec2f9874f9b20f5ec114db3d27f8b 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java
@@ -2,6 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl.number.parse;
  
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
  import com.ibm.icu.impl.StaticUnicodeSets;
  import com.ibm.icu.impl.StringSegment;
  import com.ibm.icu.text.DecimalFormatSymbols;
@@ -17,7 +19,7 @@ public class PlusSignMatcher extends SymbolMatcher {
  
      public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
          String symbolString = symbols.getPlusSignString();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
              return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
          } else {
              return new PlusSignMatcher(symbolString, allowTrailing);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java

index 95dff257db85f59527e1f0812da421c6dcb9b4c6..1904ea414a21921910d042aed0f8b9084cfe69a7 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@@ -2,6 +2,8 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl.number.parse;
  
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
  import com.ibm.icu.impl.StaticUnicodeSets;
  import com.ibm.icu.impl.StringSegment;
  import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
@@ -32,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher {
                  ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
  
          String minusSign = symbols.getMinusSignString();
-        customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
+        customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
          String plusSign = symbols.getPlusSignString();
-        customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
+        customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
      }
  
      private static UnicodeSet minusSignSet() {
@@ -53,6 +55,7 @@ public class ScientificMatcher implements NumberParseMatcher {
          }
  
          // First match the scientific separator, and then match another number after it.
+        // NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again.
          int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString);
          if (overlap1 == exponentSeparatorString.length()) {
              // Full exponent separator match.
@@ -71,6 +74,7 @@ public class ScientificMatcher implements NumberParseMatcher {
              } else if (segment.startsWith(plusSignSet())) {
                  segment.adjustOffsetByCodePoint();
              } else if (segment.startsWith(customMinusSign)) {
+                // Note: call site is guarded with startsWith, which returns false on empty string
                  int overlap2 = segment.getCommonPrefixLength(customMinusSign);
                  if (overlap2 != customMinusSign.length()) {
                      // Partial custom sign match; un-match the exponent separator.
@@ -80,6 +84,7 @@ public class ScientificMatcher implements NumberParseMatcher {
                  exponentSign = -1;
                  segment.adjustOffset(overlap2);
              } else if (segment.startsWith(customPlusSign)) {
+                // Note: call site is guarded with startsWith, which returns false on empty string
                  int overlap2 = segment.getCommonPrefixLength(customPlusSign);
                  if (overlap2 != customPlusSign.length()) {
                      // Partial custom sign match; un-match the exponent separator.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java

index 1210340ac244a4a259c45b3c2e906631eacffbcb..78bf6b56ebadd5932578dcbcfd8d045a205c331f 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@@ -6170,4 +6170,57 @@ public class NumberFormatTest extends TestFmwk {
          NumberFormat df = NumberFormat.getInstance(ULocale.US, NumberFormat.PLURALCURRENCYSTYLE);
          expect2(df, 1.5, "1.50 US dollars");
      }
+
+    @Test
+    public void test13804_EmptyStringsWhenParsing() {
+        DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+        dfs.setCurrencySymbol("");
+        dfs.setDecimalSeparatorString("");
+        dfs.setDigitStrings(new String[] { "", "", "", "", "", "", "", "", "", "" });
+        dfs.setExponentMultiplicationSign("");
+        dfs.setExponentSeparator("");
+        dfs.setGroupingSeparatorString("");
+        dfs.setInfinity("");
+        dfs.setInternationalCurrencySymbol("");
+        dfs.setMinusSignString("");
+        dfs.setMonetaryDecimalSeparatorString("");
+        dfs.setMonetaryGroupingSeparatorString("");
+        dfs.setNaN("");
+        dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false, "");
+        dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true, "");
+        dfs.setPercentString("");
+        dfs.setPerMillString("");
+        dfs.setPlusSignString("");
+
+        DecimalFormat df = new DecimalFormat("0", dfs);
+        df.setGroupingUsed(true);
+        df.setScientificNotation(true);
+        df.setParseStrict(false); // enable all matchers
+        df.format(0); // should not throw or hit infinite loop
+        String[] samples = new String[] {
+                "",
+                "123",
+                "$123",
+                "-",
+                "+",
+                "44%",
+                "1E+2.3"
+        };
+        for (String sample : samples) {
+            logln("Attempting parse on: " + sample);
+            // We don't care about the results, only that we don't throw and don't loop.
+            ParsePosition ppos = new ParsePosition(0);
+            df.parse(sample, ppos);
+            ppos = new ParsePosition(0);
+            df.parseCurrency(sample, ppos);
+        }
+
+        // Test with a nonempty exponent separator symbol to cover more code
+        dfs.setExponentSeparator("E");
+        df.setDecimalFormatSymbols(dfs);
+        {
+            ParsePosition ppos = new ParsePosition(0);
+            df.parse("1E+2.3", ppos);
+        }
+    }
  }
author	Shane Carr <shane@unicode.org>
	Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
committer	Shane Carr <shane@unicode.org>
	Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
icu4c/source/i18n/numparse_currency.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_decimal.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_scientific.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_stringsegment.cpp		patch \| blob \| history
icu4c/source/i18n/numparse_types.h		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/numfmtst.h		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java		patch \| blob \| history