]> granicus.if.org Git - icu/commitdiff
ICU-13804 Making number parsing code more robust when given empty symbol strings.
authorShane Carr <shane@unicode.org>
Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
committerShane Carr <shane@unicode.org>
Fri, 1 Jun 2018 00:31:54 +0000 (00:31 +0000)
X-SVN-Rev: 41497

17 files changed:
icu4c/source/i18n/numparse_currency.cpp
icu4c/source/i18n/numparse_decimal.cpp
icu4c/source/i18n/numparse_scientific.cpp
icu4c/source/i18n/numparse_stringsegment.cpp
icu4c/source/i18n/numparse_types.h
icu4c/source/test/intltest/numfmtst.cpp
icu4c/source/test/intltest/numfmtst.h
icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java
icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java

index 2ddafbe50fa564ef29d130a31a970bafd3442dab..ae8196ec483799f0ec1fd328bab1110e60d0d30d 100644 (file)
@@ -62,7 +62,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
     // Try to match a currency spacing separator.
     int32_t initialOffset = segment.getOffset();
     bool maybeMore = false;
-    if (result.seenNumber()) {
+    if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
         int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
         if (overlap == beforeSuffixInsert.length()) {
             segment.adjustOffset(overlap);
@@ -79,7 +79,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
     }
 
     // Try to match a currency spacing separator.
-    if (!result.seenNumber()) {
+    if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
         int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
         if (overlap == afterPrefixInsert.length()) {
             segment.adjustOffset(overlap);
@@ -95,7 +95,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
                                             UErrorCode& status) const {
     bool maybeMore = false;
 
-    int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+    int32_t overlap1;
+    if (!fCurrency1.isEmpty()) {
+        overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+    } else {
+        overlap1 = -1;
+    }
     maybeMore = maybeMore || overlap1 == segment.length();
     if (overlap1 == fCurrency1.length()) {
         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
@@ -104,7 +109,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
         return maybeMore;
     }
 
-    int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+    int32_t overlap2;
+    if (!fCurrency2.isEmpty()) {
+        overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+    } else {
+        overlap2 = -1;
+    }
     maybeMore = maybeMore || overlap2 == segment.length();
     if (overlap2 == fCurrency2.length()) {
         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
index 886704386d93d42c806bbce345a0bac69ac34cf3..b120c5c6ad2f91f35ea1d40a98d6b72d0b5ba393 100644 (file)
@@ -44,12 +44,14 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
             strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
     if (decimalKey >= 0) {
         decimalUniSet = unisets::get(decimalKey);
-    } else {
+    } else if (!decimalSeparator.isEmpty()) {
         auto* set = new UnicodeSet();
         set->add(decimalSeparator.char32At(0));
         set->freeze();
         decimalUniSet = set;
         fLocalDecimalUniSet.adoptInstead(set);
+    } else {
+        decimalUniSet = unisets::get(unisets::EMPTY);
     }
 
     if (groupingKey >= 0 && decimalKey >= 0) {
@@ -161,6 +163,9 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
         if (digit == -1 && !fLocalDigitStrings.isNull()) {
             for (int32_t i = 0; i < 10; i++) {
                 const UnicodeString& str = fLocalDigitStrings[i];
+                if (str.isEmpty()) {
+                    continue;
+                }
                 int32_t overlap = segment.getCommonPrefixLength(str);
                 if (overlap == str.length()) {
                     segment.adjustOffset(overlap);
@@ -191,7 +196,7 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
 
         // 1) Attempt the decimal separator string literal.
         // if (we have not seen a decimal separator yet) { ... }
-        if (actualDecimalString.isBogus()) {
+        if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
             int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
             maybeMore = maybeMore || (overlap == segment.length());
             if (overlap == decimalSeparator.length()) {
@@ -211,7 +216,8 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
 
         // 2.5) Attempt to match a new the grouping separator string literal.
         // if (we have not seen a grouping or decimal separator yet) { ... }
-        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
+        if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
+            !groupingSeparator.isEmpty()) {
             int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
             maybeMore = maybeMore || (overlap == segment.length());
             if (overlap == groupingSeparator.length()) {
index 3950392bf26956d323b32df0befaf3818b693978..611695e57d4743ccd7e117eda1bcbd662a151977 100644 (file)
@@ -57,6 +57,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
     }
 
     // First match the scientific separator, and then match another number after it.
+    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
     int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
     if (overlap1 == fExponentSeparatorString.length()) {
         // Full exponent separator match.
@@ -75,6 +76,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
         } else if (segment.startsWith(plusSignSet())) {
             segment.adjustOffsetByCodePoint();
         } else if (segment.startsWith(fCustomMinusSign)) {
+            // Note: call site is guarded with startsWith, which returns false on empty string
             int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
             if (overlap2 != fCustomMinusSign.length()) {
                 // Partial custom sign match; un-match the exponent separator.
@@ -84,6 +86,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
             exponentSign = -1;
             segment.adjustOffset(overlap2);
         } else if (segment.startsWith(fCustomPlusSign)) {
+            // Note: call site is guarded with startsWith, which returns false on empty string
             int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
             if (overlap2 != fCustomPlusSign.length()) {
                 // Partial custom sign match; un-match the exponent separator.
index 0c5d4267dc526094ca7c0e959c1298628a17b001..3db4fe618a603a0043151c01af93793957c818e7 100644 (file)
@@ -112,6 +112,7 @@ int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other)
 }
 
 int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
+    U_ASSERT(other.length() > 0);
     int32_t offset = 0;
     for (; offset < uprv_min(length(), other.length());) {
         // TODO: case-fold code points, not chars
index 50b98be7333a82be6e21fb24eb37e93c90dd0a94..ab591eaba83af52bd89f7247a0ba6edc9f9d3fa3 100644 (file)
@@ -247,7 +247,13 @@ class U_I18N_API StringSegment : public UMemory {
      * since the first 2 characters are the same.
      *
      * <p>
-     * This method will perform case folding if case folding is enabled for the parser.
+     * This method only returns offsets along code point boundaries.
+     *
+     * <p>
+     * This method will perform case folding if case folding was enabled in the constructor.
+     *
+     * <p>
+     * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
      */
     int32_t getCommonPrefixLength(const UnicodeString& other);
 
index 515f514484d62e0a37227b9807f39f6ab5d43710..12172fcf166755b2c83bbc642124ef7fa65601f7 100644 (file)
@@ -214,6 +214,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
   TESTCASE_AUTO(Test11640_TripleCurrencySymbol);
   TESTCASE_AUTO(Test13763_FieldPositionIteratorOffset);
   TESTCASE_AUTO(Test13777_ParseLongNameNonCurrencyMode);
+  TESTCASE_AUTO(Test13804_EmptyStringsWhenParsing);
   TESTCASE_AUTO_END;
 }
 
@@ -9064,4 +9065,80 @@ void NumberFormatTest::Test13777_ParseLongNameNonCurrencyMode() {
     expect2(*df, 1.5, u"1.50 US dollars");
 }
 
+void NumberFormatTest::Test13804_EmptyStringsWhenParsing() {
+    IcuTestErrorCode status(*this, "Test13804_EmptyStringsWhenParsing");
+
+    DecimalFormatSymbols dfs("en", status);
+    if (status.errIfFailureAndReset()) {
+        return;
+    }
+    dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kTwoDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kThreeDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kFourDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kFiveDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kSixDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kSevenDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kEightDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kNineDigitSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kExponentMultiplicationSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMinusSignSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"", FALSE);
+    dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, FALSE, u"");
+    dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, TRUE, u"");
+    dfs.setSymbol(DecimalFormatSymbols::kPercentSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kPerMillSymbol, u"", FALSE);
+    dfs.setSymbol(DecimalFormatSymbols::kPlusSignSymbol, u"", FALSE);
+
+    DecimalFormat df("0", dfs, status);
+    if (status.errIfFailureAndReset()) {
+        return;
+    }
+    df.setGroupingUsed(TRUE);
+    df.setScientificNotation(TRUE);
+    df.setLenient(TRUE); // enable all matchers
+    {
+        UnicodeString result;
+        df.format(0, result); // should not crash or hit infinite loop
+    }
+    const char16_t* samples[] = {
+            u"",
+            u"123",
+            u"$123",
+            u"-",
+            u"+",
+            u"44%",
+            u"1E+2.3"
+    };
+    for (auto& sample : samples) {
+        logln(UnicodeString(u"Attempting parse on: ") + sample);
+        status.setScope(sample);
+        // We don't care about the results, only that we don't crash and don't loop.
+        Formattable result;
+        ParsePosition ppos(0);
+        df.parse(sample, result, ppos);
+        ppos = ParsePosition(0);
+        df.parseCurrency(sample, ppos);
+        status.errIfFailureAndReset();
+    }
+
+    // Test with a nonempty exponent separator symbol to cover more code
+    dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"E", FALSE);
+    df.setDecimalFormatSymbols(dfs);
+    {
+        Formattable result;
+        ParsePosition ppos(0);
+        df.parse(u"1E+2.3", result, ppos);
+    }
+}
+
 #endif /* #if !UCONFIG_NO_FORMATTING */
index 60922c150e77d28a4ecabdfba281e35934f51493..1c7c86229649de49e826e3e908c1850b42639261 100644 (file)
@@ -278,6 +278,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
     void Test11640_TripleCurrencySymbol();
     void Test13763_FieldPositionIteratorOffset();
     void Test13777_ParseLongNameNonCurrencyMode();
+    void Test13804_EmptyStringsWhenParsing();
 
  private:
     UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);
index b4543ef69e26c313f6907dcb5fb4a10b4fb9c767..63f250a012aca982699d07fe1d71c5458f22b2e9 100644 (file)
@@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl;
 
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
 import java.util.EnumMap;
 import java.util.Map;
 
@@ -90,7 +92,7 @@ public class StaticUnicodeSets {
      * @return key1 if the set contains str, or COUNT if not.
      */
     public static Key chooseFrom(String str, Key key1) {
-        return get(key1).contains(str) ? key1 : null;
+        return safeContains(get(key1), str) ? key1 : null;
     }
 
     /**
@@ -108,7 +110,7 @@ public class StaticUnicodeSets {
      *         contains str.
      */
     public static Key chooseFrom(String str, Key key1, Key key2) {
-        return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
+        return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
     }
 
     /**
index 813355fbfb091d7812c74295b28b3ee9554af3e7..7ccf083ed61b48f9d24d1c54120820b1a1fe88b4 100644 (file)
@@ -156,6 +156,9 @@ public class StringSegment implements CharSequence {
      *
      * <p>
      * This method will perform case folding if case folding was enabled in the constructor.
+     *
+     * <p>
+     * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
      */
     public int getCommonPrefixLength(CharSequence other) {
         return getPrefixLengthInternal(other, foldCase);
@@ -170,8 +173,10 @@ public class StringSegment implements CharSequence {
     }
 
     private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
+        assert other.length() != 0;
         int offset = 0;
         for (; offset < Math.min(length(), other.length());) {
+            // TODO: case-fold code points, not chars
             int cp1 = Character.codePointAt(this, offset);
             int cp2 = Character.codePointAt(other, offset);
             if (!codePointsEqual(cp1, cp2, foldCase)) {
index a48be3544a7f0467280ba79dab02bfb51d1ed02f..c46db004603c9f982687fb10ac776d2a85c96766 100644 (file)
@@ -97,7 +97,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
         // Try to match a currency spacing separator.
         int initialOffset = segment.getOffset();
         boolean maybeMore = false;
-        if (result.seenNumber()) {
+        if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
             int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
             if (overlap == beforeSuffixInsert.length()) {
                 segment.adjustOffset(overlap);
@@ -114,7 +114,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
         }
 
         // Try to match a currency spacing separator.
-        if (!result.seenNumber()) {
+        if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
             int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
             if (overlap == afterPrefixInsert.length()) {
                 segment.adjustOffset(overlap);
@@ -130,7 +130,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
     private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
         boolean maybeMore = false;
 
-        int overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+        int overlap1;
+        if (!currency1.isEmpty()) {
+            overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+        } else {
+            overlap1 = -1;
+        }
         maybeMore = maybeMore || overlap1 == segment.length();
         if (overlap1 == currency1.length()) {
             result.currencyCode = isoCode;
@@ -139,7 +144,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
             return maybeMore;
         }
 
-        int overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+        int overlap2;
+        if (!currency2.isEmpty()) {
+            overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+        } else {
+            overlap2 = -1;
+        }
         maybeMore = maybeMore || overlap2 == segment.length();
         if (overlap2 == currency2.length()) {
             result.currencyCode = isoCode;
@@ -169,6 +179,9 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
             int longestFullMatch = 0;
             for (int i=0; i<StandardPlural.COUNT; i++) {
                 String name = localLongNames[i];
+                if (name.isEmpty()) {
+                    continue;
+                }
                 int overlap = segment.getCommonPrefixLength(name);
                 if (overlap == name.length() && name.length() > longestFullMatch) {
                     longestFullMatch = name.length();
index 17ef898f01ae653cfde8587c717106203b93d41f..4825b1ad4bd3d74e35ad5e2e7df3ffa3c12e3b7c 100644 (file)
@@ -71,8 +71,10 @@ public class DecimalMatcher implements NumberParseMatcher {
                 strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
         if (decimalKey != null) {
             decimalUniSet = StaticUnicodeSets.get(decimalKey);
-        } else {
+        } else if (!decimalSeparator.isEmpty()) {
             decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
+        } else {
+            decimalUniSet = UnicodeSet.EMPTY;
         }
 
         if (groupingKey != null && decimalKey != null) {
@@ -178,6 +180,9 @@ public class DecimalMatcher implements NumberParseMatcher {
             if (digit == -1 && digitStrings != null) {
                 for (int i = 0; i < digitStrings.length; i++) {
                     String str = digitStrings[i];
+                    if (str.isEmpty()) {
+                        continue;
+                    }
                     int overlap = segment.getCommonPrefixLength(str);
                     if (overlap == str.length()) {
                         segment.adjustOffset(overlap);
@@ -207,7 +212,7 @@ public class DecimalMatcher implements NumberParseMatcher {
 
             // 1) Attempt the decimal separator string literal.
             // if (we have not seen a decimal separator yet) { ... }
-            if (actualDecimalString == null) {
+            if (actualDecimalString == null && !decimalSeparator.isEmpty()) {
                 int overlap = segment.getCommonPrefixLength(decimalSeparator);
                 maybeMore = maybeMore || (overlap == segment.length());
                 if (overlap == decimalSeparator.length()) {
@@ -227,7 +232,10 @@ public class DecimalMatcher implements NumberParseMatcher {
 
             // 2.5) Attempt to match a new the grouping separator string literal.
             // if (we have not seen a grouping or decimal separator yet) { ... }
-            if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
+            if (!groupingDisabled
+                    && actualGroupingString == null
+                    && actualDecimalString == null
+                    && !groupingSeparator.isEmpty()) {
                 int overlap = segment.getCommonPrefixLength(groupingSeparator);
                 maybeMore = maybeMore || (overlap == segment.length());
                 if (overlap == groupingSeparator.length()) {
index 081112797b9d35f94ae29ba7043c571db2472057..0aa915aca63db764c6abbd99c18e72124e34f2a6 100644 (file)
@@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@@ -16,7 +18,7 @@ public class InfinityMatcher extends SymbolMatcher {
 
     public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
         String symbolString = symbols.getInfinity();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
             return DEFAULT;
         } else {
             return new InfinityMatcher(symbolString);
index c31c29a1cac5ceb33eef097f6d7758eb65efe542..126b9b44206c635586afe89b167424c0f2a3d4df 100644 (file)
@@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@@ -17,7 +19,7 @@ public class MinusSignMatcher extends SymbolMatcher {
 
     public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
         String symbolString = symbols.getMinusSignString();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
             return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
         } else {
             return new MinusSignMatcher(symbolString, allowTrailing);
index 7163dc6cf96debb44299c220efeb33d417ab6983..1fbab42b90090fb859bd00431bca9d6820bedd19 100644 (file)
@@ -40,4 +40,9 @@ public class ParsingUtils {
         }
     }
 
+    // TODO: Remove this helper function (and update call sites) when #13805 is fixed
+    public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
+        return str.length() != 0 && uniset.contains(str);
+    }
+
 }
index e65a0b07961090a70fb763bbcdf9b019021242e8..7887f04c78dec2f9874f9b20f5ec114db3d27f8b 100644 (file)
@@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.text.DecimalFormatSymbols;
@@ -17,7 +19,7 @@ public class PlusSignMatcher extends SymbolMatcher {
 
     public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
         String symbolString = symbols.getPlusSignString();
-        if (DEFAULT.uniSet.contains(symbolString)) {
+        if (safeContains(DEFAULT.uniSet, symbolString)) {
             return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
         } else {
             return new PlusSignMatcher(symbolString, allowTrailing);
index 95dff257db85f59527e1f0812da421c6dcb9b4c6..1904ea414a21921910d042aed0f8b9084cfe69a7 100644 (file)
@@ -2,6 +2,8 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
 import com.ibm.icu.impl.StaticUnicodeSets;
 import com.ibm.icu.impl.StringSegment;
 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
@@ -32,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher {
                 ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
 
         String minusSign = symbols.getMinusSignString();
-        customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
+        customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
         String plusSign = symbols.getPlusSignString();
-        customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
+        customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
     }
 
     private static UnicodeSet minusSignSet() {
@@ -53,6 +55,7 @@ public class ScientificMatcher implements NumberParseMatcher {
         }
 
         // First match the scientific separator, and then match another number after it.
+        // NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again.
         int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString);
         if (overlap1 == exponentSeparatorString.length()) {
             // Full exponent separator match.
@@ -71,6 +74,7 @@ public class ScientificMatcher implements NumberParseMatcher {
             } else if (segment.startsWith(plusSignSet())) {
                 segment.adjustOffsetByCodePoint();
             } else if (segment.startsWith(customMinusSign)) {
+                // Note: call site is guarded with startsWith, which returns false on empty string
                 int overlap2 = segment.getCommonPrefixLength(customMinusSign);
                 if (overlap2 != customMinusSign.length()) {
                     // Partial custom sign match; un-match the exponent separator.
@@ -80,6 +84,7 @@ public class ScientificMatcher implements NumberParseMatcher {
                 exponentSign = -1;
                 segment.adjustOffset(overlap2);
             } else if (segment.startsWith(customPlusSign)) {
+                // Note: call site is guarded with startsWith, which returns false on empty string
                 int overlap2 = segment.getCommonPrefixLength(customPlusSign);
                 if (overlap2 != customPlusSign.length()) {
                     // Partial custom sign match; un-match the exponent separator.
index 1210340ac244a4a259c45b3c2e906631eacffbcb..78bf6b56ebadd5932578dcbcfd8d045a205c331f 100644 (file)
@@ -6170,4 +6170,57 @@ public class NumberFormatTest extends TestFmwk {
         NumberFormat df = NumberFormat.getInstance(ULocale.US, NumberFormat.PLURALCURRENCYSTYLE);
         expect2(df, 1.5, "1.50 US dollars");
     }
+
+    @Test
+    public void test13804_EmptyStringsWhenParsing() {
+        DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+        dfs.setCurrencySymbol("");
+        dfs.setDecimalSeparatorString("");
+        dfs.setDigitStrings(new String[] { "", "", "", "", "", "", "", "", "", "" });
+        dfs.setExponentMultiplicationSign("");
+        dfs.setExponentSeparator("");
+        dfs.setGroupingSeparatorString("");
+        dfs.setInfinity("");
+        dfs.setInternationalCurrencySymbol("");
+        dfs.setMinusSignString("");
+        dfs.setMonetaryDecimalSeparatorString("");
+        dfs.setMonetaryGroupingSeparatorString("");
+        dfs.setNaN("");
+        dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false, "");
+        dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true, "");
+        dfs.setPercentString("");
+        dfs.setPerMillString("");
+        dfs.setPlusSignString("");
+
+        DecimalFormat df = new DecimalFormat("0", dfs);
+        df.setGroupingUsed(true);
+        df.setScientificNotation(true);
+        df.setParseStrict(false); // enable all matchers
+        df.format(0); // should not throw or hit infinite loop
+        String[] samples = new String[] {
+                "",
+                "123",
+                "$123",
+                "-",
+                "+",
+                "44%",
+                "1E+2.3"
+        };
+        for (String sample : samples) {
+            logln("Attempting parse on: " + sample);
+            // We don't care about the results, only that we don't throw and don't loop.
+            ParsePosition ppos = new ParsePosition(0);
+            df.parse(sample, ppos);
+            ppos = new ParsePosition(0);
+            df.parseCurrency(sample, ppos);
+        }
+
+        // Test with a nonempty exponent separator symbol to cover more code
+        dfs.setExponentSeparator("E");
+        df.setDecimalFormatSymbols(dfs);
+        {
+            ParsePosition ppos = new ParsePosition(0);
+            df.parse("1E+2.3", ppos);
+        }
+    }
 }