From: Shane Carr Date: Fri, 1 Jun 2018 00:31:54 +0000 (+0000) Subject: ICU-13804 Making number parsing code more robust when given empty symbol strings. X-Git-Tag: release-62-rc~3 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dd7667d330a4be76f559bc0e77f2ae7a033f56f1;p=icu ICU-13804 Making number parsing code more robust when given empty symbol strings. X-SVN-Rev: 41497 --- diff --git a/icu4c/source/i18n/numparse_currency.cpp b/icu4c/source/i18n/numparse_currency.cpp index 2ddafbe50fa..ae8196ec483 100644 --- a/icu4c/source/i18n/numparse_currency.cpp +++ b/icu4c/source/i18n/numparse_currency.cpp @@ -62,7 +62,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr // Try to match a currency spacing separator. int32_t initialOffset = segment.getOffset(); bool maybeMore = false; - if (result.seenNumber()) { + if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert); if (overlap == beforeSuffixInsert.length()) { segment.adjustOffset(overlap); @@ -79,7 +79,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr } // Try to match a currency spacing separator. - if (!result.seenNumber()) { + if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert); if (overlap == afterPrefixInsert.length()) { segment.adjustOffset(overlap); @@ -95,7 +95,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber UErrorCode& status) const { bool maybeMore = false; - int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); + int32_t overlap1; + if (!fCurrency1.isEmpty()) { + overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1); + } else { + overlap1 = -1; + } maybeMore = maybeMore || overlap1 == segment.length(); if (overlap1 == fCurrency1.length()) { utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); @@ -104,7 +109,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber return maybeMore; } - int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2); + int32_t overlap2; + if (!fCurrency2.isEmpty()) { + overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2); + } else { + overlap2 = -1; + } maybeMore = maybeMore || overlap2 == segment.length(); if (overlap2 == fCurrency2.length()) { utils::copyCurrencyCode(result.currencyCode, fCurrencyCode); diff --git a/icu4c/source/i18n/numparse_decimal.cpp b/icu4c/source/i18n/numparse_decimal.cpp index 886704386d9..b120c5c6ad2 100644 --- a/icu4c/source/i18n/numparse_decimal.cpp +++ b/icu4c/source/i18n/numparse_decimal.cpp @@ -44,12 +44,14 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD); if (decimalKey >= 0) { decimalUniSet = unisets::get(decimalKey); - } else { + } else if (!decimalSeparator.isEmpty()) { auto* set = new UnicodeSet(); set->add(decimalSeparator.char32At(0)); set->freeze(); decimalUniSet = set; fLocalDecimalUniSet.adoptInstead(set); + } else { + decimalUniSet = unisets::get(unisets::EMPTY); } if (groupingKey >= 0 && decimalKey >= 0) { @@ -161,6 +163,9 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t if (digit == -1 && !fLocalDigitStrings.isNull()) { for (int32_t i = 0; i < 10; i++) { const UnicodeString& str = fLocalDigitStrings[i]; + if (str.isEmpty()) { + continue; + } int32_t overlap = segment.getCommonPrefixLength(str); if (overlap == str.length()) { segment.adjustOffset(overlap); @@ -191,7 +196,7 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t // 1) Attempt the decimal separator string literal. // if (we have not seen a decimal separator yet) { ... } - if (actualDecimalString.isBogus()) { + if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(decimalSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == decimalSeparator.length()) { @@ -211,7 +216,8 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t // 2.5) Attempt to match a new the grouping separator string literal. // if (we have not seen a grouping or decimal separator yet) { ... } - if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) { + if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() && + !groupingSeparator.isEmpty()) { int32_t overlap = segment.getCommonPrefixLength(groupingSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == groupingSeparator.length()) { diff --git a/icu4c/source/i18n/numparse_scientific.cpp b/icu4c/source/i18n/numparse_scientific.cpp index 3950392bf26..611695e57d4 100644 --- a/icu4c/source/i18n/numparse_scientific.cpp +++ b/icu4c/source/i18n/numparse_scientific.cpp @@ -57,6 +57,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr } // First match the scientific separator, and then match another number after it. + // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again. int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString); if (overlap1 == fExponentSeparatorString.length()) { // Full exponent separator match. @@ -75,6 +76,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr } else if (segment.startsWith(plusSignSet())) { segment.adjustOffsetByCodePoint(); } else if (segment.startsWith(fCustomMinusSign)) { + // Note: call site is guarded with startsWith, which returns false on empty string int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign); if (overlap2 != fCustomMinusSign.length()) { // Partial custom sign match; un-match the exponent separator. @@ -84,6 +86,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr exponentSign = -1; segment.adjustOffset(overlap2); } else if (segment.startsWith(fCustomPlusSign)) { + // Note: call site is guarded with startsWith, which returns false on empty string int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign); if (overlap2 != fCustomPlusSign.length()) { // Partial custom sign match; un-match the exponent separator. diff --git a/icu4c/source/i18n/numparse_stringsegment.cpp b/icu4c/source/i18n/numparse_stringsegment.cpp index 0c5d4267dc5..3db4fe618a6 100644 --- a/icu4c/source/i18n/numparse_stringsegment.cpp +++ b/icu4c/source/i18n/numparse_stringsegment.cpp @@ -112,6 +112,7 @@ int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) } int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) { + U_ASSERT(other.length() > 0); int32_t offset = 0; for (; offset < uprv_min(length(), other.length());) { // TODO: case-fold code points, not chars diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h index 50b98be7333..ab591eaba83 100644 --- a/icu4c/source/i18n/numparse_types.h +++ b/icu4c/source/i18n/numparse_types.h @@ -247,7 +247,13 @@ class U_I18N_API StringSegment : public UMemory { * since the first 2 characters are the same. * *

- * This method will perform case folding if case folding is enabled for the parser. + * This method only returns offsets along code point boundaries. + * + *

+ * This method will perform case folding if case folding was enabled in the constructor. + * + *

+ * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check. */ int32_t getCommonPrefixLength(const UnicodeString& other); diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 515f514484d..12172fcf166 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -214,6 +214,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n TESTCASE_AUTO(Test11640_TripleCurrencySymbol); TESTCASE_AUTO(Test13763_FieldPositionIteratorOffset); TESTCASE_AUTO(Test13777_ParseLongNameNonCurrencyMode); + TESTCASE_AUTO(Test13804_EmptyStringsWhenParsing); TESTCASE_AUTO_END; } @@ -9064,4 +9065,80 @@ void NumberFormatTest::Test13777_ParseLongNameNonCurrencyMode() { expect2(*df, 1.5, u"1.50 US dollars"); } +void NumberFormatTest::Test13804_EmptyStringsWhenParsing() { + IcuTestErrorCode status(*this, "Test13804_EmptyStringsWhenParsing"); + + DecimalFormatSymbols dfs("en", status); + if (status.errIfFailureAndReset()) { + return; + } + dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kTwoDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kThreeDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kFourDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kFiveDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kSixDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kSevenDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kEightDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kNineDigitSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kExponentMultiplicationSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kMinusSignSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"", FALSE); + dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, FALSE, u""); + dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, TRUE, u""); + dfs.setSymbol(DecimalFormatSymbols::kPercentSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kPerMillSymbol, u"", FALSE); + dfs.setSymbol(DecimalFormatSymbols::kPlusSignSymbol, u"", FALSE); + + DecimalFormat df("0", dfs, status); + if (status.errIfFailureAndReset()) { + return; + } + df.setGroupingUsed(TRUE); + df.setScientificNotation(TRUE); + df.setLenient(TRUE); // enable all matchers + { + UnicodeString result; + df.format(0, result); // should not crash or hit infinite loop + } + const char16_t* samples[] = { + u"", + u"123", + u"$123", + u"-", + u"+", + u"44%", + u"1E+2.3" + }; + for (auto& sample : samples) { + logln(UnicodeString(u"Attempting parse on: ") + sample); + status.setScope(sample); + // We don't care about the results, only that we don't crash and don't loop. + Formattable result; + ParsePosition ppos(0); + df.parse(sample, result, ppos); + ppos = ParsePosition(0); + df.parseCurrency(sample, ppos); + status.errIfFailureAndReset(); + } + + // Test with a nonempty exponent separator symbol to cover more code + dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"E", FALSE); + df.setDecimalFormatSymbols(dfs); + { + Formattable result; + ParsePosition ppos(0); + df.parse(u"1E+2.3", result, ppos); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h index 60922c150e7..1c7c8622964 100644 --- a/icu4c/source/test/intltest/numfmtst.h +++ b/icu4c/source/test/intltest/numfmtst.h @@ -278,6 +278,7 @@ class NumberFormatTest: public CalendarTimeZoneTest { void Test11640_TripleCurrencySymbol(); void Test13763_FieldPositionIteratorOffset(); void Test13777_ParseLongNameNonCurrencyMode(); + void Test13804_EmptyStringsWhenParsing(); private: UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java index b4543ef69e2..63f250a012a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java @@ -2,6 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl; +import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains; + import java.util.EnumMap; import java.util.Map; @@ -90,7 +92,7 @@ public class StaticUnicodeSets { * @return key1 if the set contains str, or COUNT if not. */ public static Key chooseFrom(String str, Key key1) { - return get(key1).contains(str) ? key1 : null; + return safeContains(get(key1), str) ? key1 : null; } /** @@ -108,7 +110,7 @@ public class StaticUnicodeSets { * contains str. */ public static Key chooseFrom(String str, Key key1, Key key2) { - return get(key1).contains(str) ? key1 : chooseFrom(str, key2); + return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java index 813355fbfb0..7ccf083ed61 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StringSegment.java @@ -156,6 +156,9 @@ public class StringSegment implements CharSequence { * *

* This method will perform case folding if case folding was enabled in the constructor. + * + *

+ * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check. */ public int getCommonPrefixLength(CharSequence other) { return getPrefixLengthInternal(other, foldCase); @@ -170,8 +173,10 @@ public class StringSegment implements CharSequence { } private int getPrefixLengthInternal(CharSequence other, boolean foldCase) { + assert other.length() != 0; int offset = 0; for (; offset < Math.min(length(), other.length());) { + // TODO: case-fold code points, not chars int cp1 = Character.codePointAt(this, offset); int cp2 = Character.codePointAt(other, offset); if (!codePointsEqual(cp1, cp2, foldCase)) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java index a48be3544a7..c46db004603 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CombinedCurrencyMatcher.java @@ -97,7 +97,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher { // Try to match a currency spacing separator. int initialOffset = segment.getOffset(); boolean maybeMore = false; - if (result.seenNumber()) { + if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { int overlap = segment.getCommonPrefixLength(beforeSuffixInsert); if (overlap == beforeSuffixInsert.length()) { segment.adjustOffset(overlap); @@ -114,7 +114,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher { } // Try to match a currency spacing separator. - if (!result.seenNumber()) { + if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { int overlap = segment.getCommonPrefixLength(afterPrefixInsert); if (overlap == afterPrefixInsert.length()) { segment.adjustOffset(overlap); @@ -130,7 +130,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher { private boolean matchCurrency(StringSegment segment, ParsedNumber result) { boolean maybeMore = false; - int overlap1 = segment.getCaseSensitivePrefixLength(currency1); + int overlap1; + if (!currency1.isEmpty()) { + overlap1 = segment.getCaseSensitivePrefixLength(currency1); + } else { + overlap1 = -1; + } maybeMore = maybeMore || overlap1 == segment.length(); if (overlap1 == currency1.length()) { result.currencyCode = isoCode; @@ -139,7 +144,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher { return maybeMore; } - int overlap2 = segment.getCaseSensitivePrefixLength(currency2); + int overlap2; + if (!currency2.isEmpty()) { + overlap2 = segment.getCaseSensitivePrefixLength(currency2); + } else { + overlap2 = -1; + } maybeMore = maybeMore || overlap2 == segment.length(); if (overlap2 == currency2.length()) { result.currencyCode = isoCode; @@ -169,6 +179,9 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher { int longestFullMatch = 0; for (int i=0; i longestFullMatch) { longestFullMatch = name.length(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java index 17ef898f01a..4825b1ad4bd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java @@ -71,8 +71,10 @@ public class DecimalMatcher implements NumberParseMatcher { strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD); if (decimalKey != null) { decimalUniSet = StaticUnicodeSets.get(decimalKey); - } else { + } else if (!decimalSeparator.isEmpty()) { decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze(); + } else { + decimalUniSet = UnicodeSet.EMPTY; } if (groupingKey != null && decimalKey != null) { @@ -178,6 +180,9 @@ public class DecimalMatcher implements NumberParseMatcher { if (digit == -1 && digitStrings != null) { for (int i = 0; i < digitStrings.length; i++) { String str = digitStrings[i]; + if (str.isEmpty()) { + continue; + } int overlap = segment.getCommonPrefixLength(str); if (overlap == str.length()) { segment.adjustOffset(overlap); @@ -207,7 +212,7 @@ public class DecimalMatcher implements NumberParseMatcher { // 1) Attempt the decimal separator string literal. // if (we have not seen a decimal separator yet) { ... } - if (actualDecimalString == null) { + if (actualDecimalString == null && !decimalSeparator.isEmpty()) { int overlap = segment.getCommonPrefixLength(decimalSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == decimalSeparator.length()) { @@ -227,7 +232,10 @@ public class DecimalMatcher implements NumberParseMatcher { // 2.5) Attempt to match a new the grouping separator string literal. // if (we have not seen a grouping or decimal separator yet) { ... } - if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) { + if (!groupingDisabled + && actualGroupingString == null + && actualDecimalString == null + && !groupingSeparator.isEmpty()) { int overlap = segment.getCommonPrefixLength(groupingSeparator); maybeMore = maybeMore || (overlap == segment.length()); if (overlap == groupingSeparator.length()) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java index 081112797b9..0aa915aca63 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/InfinityMatcher.java @@ -2,6 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; +import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains; + import com.ibm.icu.impl.StaticUnicodeSets; import com.ibm.icu.impl.StringSegment; import com.ibm.icu.text.DecimalFormatSymbols; @@ -16,7 +18,7 @@ public class InfinityMatcher extends SymbolMatcher { public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) { String symbolString = symbols.getInfinity(); - if (DEFAULT.uniSet.contains(symbolString)) { + if (safeContains(DEFAULT.uniSet, symbolString)) { return DEFAULT; } else { return new InfinityMatcher(symbolString); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java index c31c29a1cac..126b9b44206 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java @@ -2,6 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; +import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains; + import com.ibm.icu.impl.StaticUnicodeSets; import com.ibm.icu.impl.StringSegment; import com.ibm.icu.text.DecimalFormatSymbols; @@ -17,7 +19,7 @@ public class MinusSignMatcher extends SymbolMatcher { public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) { String symbolString = symbols.getMinusSignString(); - if (DEFAULT.uniSet.contains(symbolString)) { + if (safeContains(DEFAULT.uniSet, symbolString)) { return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT; } else { return new MinusSignMatcher(symbolString, allowTrailing); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java index 7163dc6cf96..1fbab42b900 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java @@ -40,4 +40,9 @@ public class ParsingUtils { } } + // TODO: Remove this helper function (and update call sites) when #13805 is fixed + public static boolean safeContains(UnicodeSet uniset, CharSequence str) { + return str.length() != 0 && uniset.contains(str); + } + } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java index e65a0b07961..7887f04c78d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PlusSignMatcher.java @@ -2,6 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; +import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains; + import com.ibm.icu.impl.StaticUnicodeSets; import com.ibm.icu.impl.StringSegment; import com.ibm.icu.text.DecimalFormatSymbols; @@ -17,7 +19,7 @@ public class PlusSignMatcher extends SymbolMatcher { public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) { String symbolString = symbols.getPlusSignString(); - if (DEFAULT.uniSet.contains(symbolString)) { + if (safeContains(DEFAULT.uniSet, symbolString)) { return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT; } else { return new PlusSignMatcher(symbolString, allowTrailing); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java index 95dff257db8..1904ea414a2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java @@ -2,6 +2,8 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; +import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains; + import com.ibm.icu.impl.StaticUnicodeSets; import com.ibm.icu.impl.StringSegment; import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; @@ -32,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher { ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED); String minusSign = symbols.getMinusSignString(); - customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign; + customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign; String plusSign = symbols.getPlusSignString(); - customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign; + customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign; } private static UnicodeSet minusSignSet() { @@ -53,6 +55,7 @@ public class ScientificMatcher implements NumberParseMatcher { } // First match the scientific separator, and then match another number after it. + // NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again. int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString); if (overlap1 == exponentSeparatorString.length()) { // Full exponent separator match. @@ -71,6 +74,7 @@ public class ScientificMatcher implements NumberParseMatcher { } else if (segment.startsWith(plusSignSet())) { segment.adjustOffsetByCodePoint(); } else if (segment.startsWith(customMinusSign)) { + // Note: call site is guarded with startsWith, which returns false on empty string int overlap2 = segment.getCommonPrefixLength(customMinusSign); if (overlap2 != customMinusSign.length()) { // Partial custom sign match; un-match the exponent separator. @@ -80,6 +84,7 @@ public class ScientificMatcher implements NumberParseMatcher { exponentSign = -1; segment.adjustOffset(overlap2); } else if (segment.startsWith(customPlusSign)) { + // Note: call site is guarded with startsWith, which returns false on empty string int overlap2 = segment.getCommonPrefixLength(customPlusSign); if (overlap2 != customPlusSign.length()) { // Partial custom sign match; un-match the exponent separator. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java index 1210340ac24..78bf6b56eba 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -6170,4 +6170,57 @@ public class NumberFormatTest extends TestFmwk { NumberFormat df = NumberFormat.getInstance(ULocale.US, NumberFormat.PLURALCURRENCYSTYLE); expect2(df, 1.5, "1.50 US dollars"); } + + @Test + public void test13804_EmptyStringsWhenParsing() { + DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH); + dfs.setCurrencySymbol(""); + dfs.setDecimalSeparatorString(""); + dfs.setDigitStrings(new String[] { "", "", "", "", "", "", "", "", "", "" }); + dfs.setExponentMultiplicationSign(""); + dfs.setExponentSeparator(""); + dfs.setGroupingSeparatorString(""); + dfs.setInfinity(""); + dfs.setInternationalCurrencySymbol(""); + dfs.setMinusSignString(""); + dfs.setMonetaryDecimalSeparatorString(""); + dfs.setMonetaryGroupingSeparatorString(""); + dfs.setNaN(""); + dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false, ""); + dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true, ""); + dfs.setPercentString(""); + dfs.setPerMillString(""); + dfs.setPlusSignString(""); + + DecimalFormat df = new DecimalFormat("0", dfs); + df.setGroupingUsed(true); + df.setScientificNotation(true); + df.setParseStrict(false); // enable all matchers + df.format(0); // should not throw or hit infinite loop + String[] samples = new String[] { + "", + "123", + "$123", + "-", + "+", + "44%", + "1E+2.3" + }; + for (String sample : samples) { + logln("Attempting parse on: " + sample); + // We don't care about the results, only that we don't throw and don't loop. + ParsePosition ppos = new ParsePosition(0); + df.parse(sample, ppos); + ppos = new ParsePosition(0); + df.parseCurrency(sample, ppos); + } + + // Test with a nonempty exponent separator symbol to cover more code + dfs.setExponentSeparator("E"); + df.setDecimalFormatSymbols(dfs); + { + ParsePosition ppos = new ParsePosition(0); + df.parse("1E+2.3", ppos); + } + } }