// Try to match a currency spacing separator.
int32_t initialOffset = segment.getOffset();
bool maybeMore = false;
- if (result.seenNumber()) {
+ if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
if (overlap == beforeSuffixInsert.length()) {
segment.adjustOffset(overlap);
}
// Try to match a currency spacing separator.
- if (!result.seenNumber()) {
+ if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
if (overlap == afterPrefixInsert.length()) {
segment.adjustOffset(overlap);
UErrorCode& status) const {
bool maybeMore = false;
- int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+ int32_t overlap1;
+ if (!fCurrency1.isEmpty()) {
+ overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
+ } else {
+ overlap1 = -1;
+ }
maybeMore = maybeMore || overlap1 == segment.length();
if (overlap1 == fCurrency1.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
return maybeMore;
}
- int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+ int32_t overlap2;
+ if (!fCurrency2.isEmpty()) {
+ overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
+ } else {
+ overlap2 = -1;
+ }
maybeMore = maybeMore || overlap2 == segment.length();
if (overlap2 == fCurrency2.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
if (decimalKey >= 0) {
decimalUniSet = unisets::get(decimalKey);
- } else {
+ } else if (!decimalSeparator.isEmpty()) {
auto* set = new UnicodeSet();
set->add(decimalSeparator.char32At(0));
set->freeze();
decimalUniSet = set;
fLocalDecimalUniSet.adoptInstead(set);
+ } else {
+ decimalUniSet = unisets::get(unisets::EMPTY);
}
if (groupingKey >= 0 && decimalKey >= 0) {
if (digit == -1 && !fLocalDigitStrings.isNull()) {
for (int32_t i = 0; i < 10; i++) {
const UnicodeString& str = fLocalDigitStrings[i];
+ if (str.isEmpty()) {
+ continue;
+ }
int32_t overlap = segment.getCommonPrefixLength(str);
if (overlap == str.length()) {
segment.adjustOffset(overlap);
// 1) Attempt the decimal separator string literal.
// if (we have not seen a decimal separator yet) { ... }
- if (actualDecimalString.isBogus()) {
+ if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == decimalSeparator.length()) {
// 2.5) Attempt to match a new the grouping separator string literal.
// if (we have not seen a grouping or decimal separator yet) { ... }
- if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
+ if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
+ !groupingSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == groupingSeparator.length()) {
}
// First match the scientific separator, and then match another number after it.
+ // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
if (overlap1 == fExponentSeparatorString.length()) {
// Full exponent separator match.
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(fCustomMinusSign)) {
+ // Note: call site is guarded with startsWith, which returns false on empty string
int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
if (overlap2 != fCustomMinusSign.length()) {
// Partial custom sign match; un-match the exponent separator.
exponentSign = -1;
segment.adjustOffset(overlap2);
} else if (segment.startsWith(fCustomPlusSign)) {
+ // Note: call site is guarded with startsWith, which returns false on empty string
int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
if (overlap2 != fCustomPlusSign.length()) {
// Partial custom sign match; un-match the exponent separator.
}
int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
+ U_ASSERT(other.length() > 0);
int32_t offset = 0;
for (; offset < uprv_min(length(), other.length());) {
// TODO: case-fold code points, not chars
* since the first 2 characters are the same.
*
* <p>
- * This method will perform case folding if case folding is enabled for the parser.
+ * This method only returns offsets along code point boundaries.
+ *
+ * <p>
+ * This method will perform case folding if case folding was enabled in the constructor.
+ *
+ * <p>
+ * IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
*/
int32_t getCommonPrefixLength(const UnicodeString& other);
TESTCASE_AUTO(Test11640_TripleCurrencySymbol);
TESTCASE_AUTO(Test13763_FieldPositionIteratorOffset);
TESTCASE_AUTO(Test13777_ParseLongNameNonCurrencyMode);
+ TESTCASE_AUTO(Test13804_EmptyStringsWhenParsing);
TESTCASE_AUTO_END;
}
expect2(*df, 1.5, u"1.50 US dollars");
}
+void NumberFormatTest::Test13804_EmptyStringsWhenParsing() {
+ IcuTestErrorCode status(*this, "Test13804_EmptyStringsWhenParsing");
+
+ DecimalFormatSymbols dfs("en", status);
+ if (status.errIfFailureAndReset()) {
+ return;
+ }
+ dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kTwoDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kThreeDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kFourDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kFiveDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kSixDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kSevenDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kEightDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kNineDigitSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kExponentMultiplicationSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kMinusSignSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"", FALSE);
+ dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, FALSE, u"");
+ dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, TRUE, u"");
+ dfs.setSymbol(DecimalFormatSymbols::kPercentSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kPerMillSymbol, u"", FALSE);
+ dfs.setSymbol(DecimalFormatSymbols::kPlusSignSymbol, u"", FALSE);
+
+ DecimalFormat df("0", dfs, status);
+ if (status.errIfFailureAndReset()) {
+ return;
+ }
+ df.setGroupingUsed(TRUE);
+ df.setScientificNotation(TRUE);
+ df.setLenient(TRUE); // enable all matchers
+ {
+ UnicodeString result;
+ df.format(0, result); // should not crash or hit infinite loop
+ }
+ const char16_t* samples[] = {
+ u"",
+ u"123",
+ u"$123",
+ u"-",
+ u"+",
+ u"44%",
+ u"1E+2.3"
+ };
+ for (auto& sample : samples) {
+ logln(UnicodeString(u"Attempting parse on: ") + sample);
+ status.setScope(sample);
+ // We don't care about the results, only that we don't crash and don't loop.
+ Formattable result;
+ ParsePosition ppos(0);
+ df.parse(sample, result, ppos);
+ ppos = ParsePosition(0);
+ df.parseCurrency(sample, ppos);
+ status.errIfFailureAndReset();
+ }
+
+ // Test with a nonempty exponent separator symbol to cover more code
+ dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"E", FALSE);
+ df.setDecimalFormatSymbols(dfs);
+ {
+ Formattable result;
+ ParsePosition ppos(0);
+ df.parse(u"1E+2.3", result, ppos);
+ }
+}
+
#endif /* #if !UCONFIG_NO_FORMATTING */
void Test11640_TripleCurrencySymbol();
void Test13763_FieldPositionIteratorOffset();
void Test13777_ParseLongNameNonCurrencyMode();
+ void Test13804_EmptyStringsWhenParsing();
private:
UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl;
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
import java.util.EnumMap;
import java.util.Map;
* @return key1 if the set contains str, or COUNT if not.
*/
public static Key chooseFrom(String str, Key key1) {
- return get(key1).contains(str) ? key1 : null;
+ return safeContains(get(key1), str) ? key1 : null;
}
/**
* contains str.
*/
public static Key chooseFrom(String str, Key key1, Key key2) {
- return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
+ return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
}
/**
*
* <p>
* This method will perform case folding if case folding was enabled in the constructor.
+ *
+ * <p>
+ * IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
*/
public int getCommonPrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, foldCase);
}
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
+ assert other.length() != 0;
int offset = 0;
for (; offset < Math.min(length(), other.length());) {
+ // TODO: case-fold code points, not chars
int cp1 = Character.codePointAt(this, offset);
int cp2 = Character.codePointAt(other, offset);
if (!codePointsEqual(cp1, cp2, foldCase)) {
// Try to match a currency spacing separator.
int initialOffset = segment.getOffset();
boolean maybeMore = false;
- if (result.seenNumber()) {
+ if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
if (overlap == beforeSuffixInsert.length()) {
segment.adjustOffset(overlap);
}
// Try to match a currency spacing separator.
- if (!result.seenNumber()) {
+ if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
if (overlap == afterPrefixInsert.length()) {
segment.adjustOffset(overlap);
private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
boolean maybeMore = false;
- int overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+ int overlap1;
+ if (!currency1.isEmpty()) {
+ overlap1 = segment.getCaseSensitivePrefixLength(currency1);
+ } else {
+ overlap1 = -1;
+ }
maybeMore = maybeMore || overlap1 == segment.length();
if (overlap1 == currency1.length()) {
result.currencyCode = isoCode;
return maybeMore;
}
- int overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+ int overlap2;
+ if (!currency2.isEmpty()) {
+ overlap2 = segment.getCaseSensitivePrefixLength(currency2);
+ } else {
+ overlap2 = -1;
+ }
maybeMore = maybeMore || overlap2 == segment.length();
if (overlap2 == currency2.length()) {
result.currencyCode = isoCode;
int longestFullMatch = 0;
for (int i=0; i<StandardPlural.COUNT; i++) {
String name = localLongNames[i];
+ if (name.isEmpty()) {
+ continue;
+ }
int overlap = segment.getCommonPrefixLength(name);
if (overlap == name.length() && name.length() > longestFullMatch) {
longestFullMatch = name.length();
strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
if (decimalKey != null) {
decimalUniSet = StaticUnicodeSets.get(decimalKey);
- } else {
+ } else if (!decimalSeparator.isEmpty()) {
decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
+ } else {
+ decimalUniSet = UnicodeSet.EMPTY;
}
if (groupingKey != null && decimalKey != null) {
if (digit == -1 && digitStrings != null) {
for (int i = 0; i < digitStrings.length; i++) {
String str = digitStrings[i];
+ if (str.isEmpty()) {
+ continue;
+ }
int overlap = segment.getCommonPrefixLength(str);
if (overlap == str.length()) {
segment.adjustOffset(overlap);
// 1) Attempt the decimal separator string literal.
// if (we have not seen a decimal separator yet) { ... }
- if (actualDecimalString == null) {
+ if (actualDecimalString == null && !decimalSeparator.isEmpty()) {
int overlap = segment.getCommonPrefixLength(decimalSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == decimalSeparator.length()) {
// 2.5) Attempt to match a new the grouping separator string literal.
// if (we have not seen a grouping or decimal separator yet) { ... }
- if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
+ if (!groupingDisabled
+ && actualGroupingString == null
+ && actualDecimalString == null
+ && !groupingSeparator.isEmpty()) {
int overlap = segment.getCommonPrefixLength(groupingSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == groupingSeparator.length()) {
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
String symbolString = symbols.getInfinity();
- if (DEFAULT.uniSet.contains(symbolString)) {
+ if (safeContains(DEFAULT.uniSet, symbolString)) {
return DEFAULT;
} else {
return new InfinityMatcher(symbolString);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
String symbolString = symbols.getMinusSignString();
- if (DEFAULT.uniSet.contains(symbolString)) {
+ if (safeContains(DEFAULT.uniSet, symbolString)) {
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
} else {
return new MinusSignMatcher(symbolString, allowTrailing);
}
}
+ // TODO: Remove this helper function (and update call sites) when #13805 is fixed
+ public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
+ return str.length() != 0 && uniset.contains(str);
+ }
+
}
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
String symbolString = symbols.getPlusSignString();
- if (DEFAULT.uniSet.contains(symbolString)) {
+ if (safeContains(DEFAULT.uniSet, symbolString)) {
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
} else {
return new PlusSignMatcher(symbolString, allowTrailing);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
+
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
String minusSign = symbols.getMinusSignString();
- customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
+ customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
String plusSign = symbols.getPlusSignString();
- customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
+ customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
}
private static UnicodeSet minusSignSet() {
}
// First match the scientific separator, and then match another number after it.
+ // NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again.
int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString);
if (overlap1 == exponentSeparatorString.length()) {
// Full exponent separator match.
} else if (segment.startsWith(plusSignSet())) {
segment.adjustOffsetByCodePoint();
} else if (segment.startsWith(customMinusSign)) {
+ // Note: call site is guarded with startsWith, which returns false on empty string
int overlap2 = segment.getCommonPrefixLength(customMinusSign);
if (overlap2 != customMinusSign.length()) {
// Partial custom sign match; un-match the exponent separator.
exponentSign = -1;
segment.adjustOffset(overlap2);
} else if (segment.startsWith(customPlusSign)) {
+ // Note: call site is guarded with startsWith, which returns false on empty string
int overlap2 = segment.getCommonPrefixLength(customPlusSign);
if (overlap2 != customPlusSign.length()) {
// Partial custom sign match; un-match the exponent separator.
NumberFormat df = NumberFormat.getInstance(ULocale.US, NumberFormat.PLURALCURRENCYSTYLE);
expect2(df, 1.5, "1.50 US dollars");
}
+
+ @Test
+ public void test13804_EmptyStringsWhenParsing() {
+ DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+ dfs.setCurrencySymbol("");
+ dfs.setDecimalSeparatorString("");
+ dfs.setDigitStrings(new String[] { "", "", "", "", "", "", "", "", "", "" });
+ dfs.setExponentMultiplicationSign("");
+ dfs.setExponentSeparator("");
+ dfs.setGroupingSeparatorString("");
+ dfs.setInfinity("");
+ dfs.setInternationalCurrencySymbol("");
+ dfs.setMinusSignString("");
+ dfs.setMonetaryDecimalSeparatorString("");
+ dfs.setMonetaryGroupingSeparatorString("");
+ dfs.setNaN("");
+ dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false, "");
+ dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true, "");
+ dfs.setPercentString("");
+ dfs.setPerMillString("");
+ dfs.setPlusSignString("");
+
+ DecimalFormat df = new DecimalFormat("0", dfs);
+ df.setGroupingUsed(true);
+ df.setScientificNotation(true);
+ df.setParseStrict(false); // enable all matchers
+ df.format(0); // should not throw or hit infinite loop
+ String[] samples = new String[] {
+ "",
+ "123",
+ "$123",
+ "-",
+ "+",
+ "44%",
+ "1E+2.3"
+ };
+ for (String sample : samples) {
+ logln("Attempting parse on: " + sample);
+ // We don't care about the results, only that we don't throw and don't loop.
+ ParsePosition ppos = new ParsePosition(0);
+ df.parse(sample, ppos);
+ ppos = new ParsePosition(0);
+ df.parseCurrency(sample, ppos);
+ }
+
+ // Test with a nonempty exponent separator symbol to cover more code
+ dfs.setExponentSeparator("E");
+ df.setDecimalFormatSymbols(dfs);
+ {
+ ParsePosition ppos = new ParsePosition(0);
+ df.parse("1E+2.3", ppos);
+ }
+ }
}