From: Shane Carr Date: Wed, 24 Jan 2018 08:59:27 +0000 (+0000) Subject: ICU-13513 Re-working separator logic to be most highly compatible with previous ICU. X-Git-Tag: release-61-rc~138^2~10 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=49d920d0984bfd1f93f590b901eec8c772aaa5f1;p=icu ICU-13513 Re-working separator logic to be most highly compatible with previous ICU. X-SVN-Rev: 40801 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java index e804fe55843..10dbc14265a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java @@ -30,6 +30,9 @@ public class DecimalMatcher implements NumberParseMatcher { private final int grouping1; private final int grouping2; + private final String groupingSeparator; + private final String decimalSeparator; + // Assumption: these sets all consist of single code points. If this assumption needs to be broken, // fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact. private final UnicodeSet groupingUniSet; @@ -47,8 +50,6 @@ public class DecimalMatcher implements NumberParseMatcher { } private DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) { - Key groupingKey, decimalKey; - String groupingSeparator, decimalSeparator; if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS)) { groupingSeparator = symbols.getMonetaryGroupingSeparatorString(); decimalSeparator = symbols.getMonetaryDecimalSeparatorString(); @@ -56,68 +57,31 @@ public class DecimalMatcher implements NumberParseMatcher { groupingSeparator = symbols.getGroupingSeparatorString(); decimalSeparator = symbols.getDecimalSeparatorString(); } + boolean strictSeparators = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS); + Key groupingKey = strictSeparators ? Key.STRICT_ALL_SEPARATORS : Key.ALL_SEPARATORS; - // Attempt to find values in the static cache - if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS)) { - decimalKey = UnicodeSetStaticCache - .chooseFrom(decimalSeparator, Key.STRICT_COMMA, Key.STRICT_PERIOD); - if (decimalKey == Key.STRICT_COMMA) { - // Decimal is comma; grouping should be period or custom - groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator, - Key.STRICT_PERIOD_OR_OTHER); - } else if (decimalKey == Key.STRICT_PERIOD) { - // Decimal is period; grouping should be comma or custom - groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator, - Key.STRICT_COMMA_OR_OTHER); - } else { - // Decimal is custom; grouping can be either comma or period or custom - groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator, - Key.STRICT_COMMA_OR_OTHER, - Key.STRICT_PERIOD_OR_OTHER); - } + // Attempt to find separators in the static cache + + groupingUniSet = UnicodeSetStaticCache.get(groupingKey); + Key decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator, + strictSeparators ? Key.STRICT_COMMA : Key.COMMA, + strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD); + if (decimalKey != null) { + decimalUniSet = UnicodeSetStaticCache.get(decimalKey); } else { - decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator, Key.COMMA, Key.PERIOD); - if (decimalKey == Key.COMMA) { - // Decimal is comma; grouping should be period or custom - groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator, Key.PERIOD_OR_OTHER); - } else if (decimalKey == Key.PERIOD) { - // Decimal is period; grouping should be comma or custom - groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator, Key.COMMA_OR_OTHER); - } else { - // Decimal is custom; grouping can be either comma or period or custom - groupingKey = UnicodeSetStaticCache - .chooseFrom(groupingSeparator, Key.COMMA_OR_OTHER, Key.PERIOD_OR_OTHER); - } + decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze(); } - // Get the sets from the static cache if they were found - UnicodeSet _groupingUniSet = null, _decimalUniSet = null, _separatorSet = null, _leadSet = null; if (groupingKey != null && decimalKey != null) { - _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); - _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); - Key separatorKey = UnicodeSetStaticCache.unionOf(groupingKey, decimalKey); - if (separatorKey != null) { - _separatorSet = UnicodeSetStaticCache.get(separatorKey); - Key leadKey = UnicodeSetStaticCache.unionOf(Key.DIGITS, separatorKey); - if (leadKey != null) { - _leadSet = UnicodeSetStaticCache.get(leadKey); - } - } - } else if (groupingKey != null) { - _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); - } else if (decimalKey != null) { - _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); + // Everything is available in the static cache + separatorSet = groupingUniSet; + leadSet = UnicodeSetStaticCache.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS + : Key.DIGITS_OR_STRICT_ALL_SEPARATORS); + } else { + separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze(); + leadSet = null; } - // Finish resolving fallbacks - groupingUniSet = _groupingUniSet != null ? _groupingUniSet - : new UnicodeSet().add(groupingSeparator.codePointAt(0)).freeze(); - decimalUniSet = _decimalUniSet != null ? _decimalUniSet - : new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze(); - separatorSet = _separatorSet != null ? _separatorSet - : new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze(); - leadSet = _leadSet; // null if not available - int cpZero = symbols.getCodePointZero(); if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZero) != 0) { digitStrings = symbols.getDigitStringsLocal(); @@ -144,21 +108,25 @@ public class DecimalMatcher implements NumberParseMatcher { return false; } - ParsedNumber backup = null; + ParsedNumber backupResult = null; if (requireGroupingMatch) { - backup = new ParsedNumber(); - backup.copyFrom(result); + backupResult = new ParsedNumber(); + backupResult.copyFrom(result); } - int firstGroup = 0; - int prevGroup = 0; - int currGroup = 0; - int separator = -1; + // strict parsing + boolean strictFail = false; // did we exit with a strict parse failure? + String actualGroupingString = groupingSeparator; + String actualDecimalString = decimalSeparator; + int groupedDigitCount = 0; // tracking count of digits delimited by grouping separator + int backupOffset = -1; // used for preserving the last confirmed position + boolean afterFirstGrouping = false; + boolean seenGrouping = false; + boolean seenDecimal = false; + int digitsAfterDecimal = 0; int initialOffset = segment.getOffset(); int exponent = 0; boolean hasPartialPrefix = false; - boolean seenBothSeparators = false; - boolean illegalGrouping = false; while (segment.length() > 0) { hasPartialPrefix = false; @@ -187,8 +155,27 @@ public class DecimalMatcher implements NumberParseMatcher { } } - // If found, save it in the DecimalQuantity or scientific adjustment. if (digit >= 0) { + // Digit was found. + // Check for grouping size violation + if (backupOffset != -1) { + if (requireGroupingMatch) { + // comma followed by digit, so group before comma is a secondary + // group. If there was a group separator before that, the group + // must == the secondary group length, else it can be <= the the + // secondary group length. + if ((afterFirstGrouping && groupedDigitCount != grouping2) + || (!afterFirstGrouping && groupedDigitCount > grouping2)) { + strictFail = true; + break; + } + } + afterFirstGrouping = true; + backupOffset = -1; + groupedDigitCount = 0; + } + + // Save the digit in the DecimalQuantity or scientific adjustment. if (isScientific) { int nextExponent = digit + exponent * 10; if (nextExponent < exponent) { @@ -204,82 +191,109 @@ public class DecimalMatcher implements NumberParseMatcher { result.quantity.appendDigit(digit, 0, true); } result.setCharsConsumed(segment); - currGroup++; + groupedDigitCount++; + if (seenDecimal) { + digitsAfterDecimal++; + } continue; } - // Attempt to match a separator. - if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) { - if (separator == -1) { - // First separator; could be either grouping or decimal. - if (groupingDisabled && !decimalUniSet.contains(cp)) { + // Attempt to match a literal grouping or decimal separator + int decimalOverlap = segment.getCommonPrefixLength(actualDecimalString); + boolean decimalStringMatch = decimalOverlap == actualDecimalString.length(); + int groupingOverlap = segment.getCommonPrefixLength(actualGroupingString); + boolean groupingStringMatch = groupingOverlap == actualGroupingString.length(); + + hasPartialPrefix = (decimalOverlap == segment.length()) + || (groupingOverlap == segment.length()); + + if (!seenDecimal + && !groupingStringMatch + && (decimalStringMatch || (!seenDecimal && decimalUniSet.contains(cp)))) { + // matched a decimal separator + if (requireGroupingMatch) { + if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) { + strictFail = true; break; } - if (integerOnly && !groupingUniSet.contains(cp)) { + } + + // If we're only parsing integers, then don't parse this one. + if (integerOnly) { + break; + } + + seenDecimal = true; + if (!decimalStringMatch) { + actualDecimalString = UCharacter.toString(cp); + } + segment.adjustOffset(actualDecimalString.length()); + result.setCharsConsumed(segment); + result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; + continue; + } + + if (!groupingDisabled + && !decimalStringMatch + && (groupingStringMatch || (!seenGrouping && groupingUniSet.contains(cp)))) { + // matched a grouping separator + if (requireGroupingMatch) { + if (groupedDigitCount == 0) { + // leading group + strictFail = true; break; - } - separator = cp; - firstGroup = currGroup; - if (requireGroupingMatch && currGroup == 0 && !decimalUniSet.contains(cp)) { - illegalGrouping = true; - } - } else if (!groupingDisabled && separator == cp && groupingUniSet.contains(cp)) { - // Second or later grouping separator. - prevGroup = currGroup; - if (requireGroupingMatch && currGroup == 0) { + } else if (backupOffset != -1) { + // two group separators in a row break; } - if (requireGroupingMatch && currGroup != grouping2) { - if (currGroup == grouping1) { - break; - } else { - illegalGrouping = true; - break; - } - } - } else if (!integerOnly && separator != cp && decimalUniSet.contains(cp)) { - // Decimal separator after a grouping separator. - if (requireGroupingMatch && currGroup != grouping1) { - illegalGrouping = true; - } - seenBothSeparators = true; - } else { - // Invalid separator. - break; } - currGroup = 0; - segment.adjustOffset(Character.charCount(cp)); + + seenGrouping = true; + if (!groupingStringMatch) { + actualGroupingString = UCharacter.toString(cp); + } + backupOffset = segment.getOffset(); + segment.adjustOffset(actualGroupingString.length()); + // Note: do NOT set charsConsumed continue; } + // Not a digit and not a separator break; } - // Unless the first group directly precedes the grouping separator, check it for validity - if (seenBothSeparators || (separator != -1 && !decimalUniSet.contains(separator))) { - if (currGroup > 0 && firstGroup > grouping2) { - illegalGrouping = true; - } - } + // if (backupOffset != -1) { + // segment.setOffset(backupOffset); + // hasPartialPrefix = true; + // } - // Check the final grouping size for validity + // Check the final grouping for validity if (requireGroupingMatch - && separator != -1 - && !seenBothSeparators - && !decimalUniSet.contains(separator)) { - if (currGroup > 0 && currGroup != grouping1) { - illegalGrouping = true; - } - if (currGroup == 0 && prevGroup > 0 && prevGroup != grouping1) { - illegalGrouping = true; - } + && !seenDecimal + && seenGrouping + && afterFirstGrouping + && groupedDigitCount != grouping1) { + strictFail = true; } - if (requireGroupingMatch && illegalGrouping) { - result.copyFrom(backup); + if (requireGroupingMatch && strictFail) { + result.copyFrom(backupResult); segment.setOffset(initialOffset); + } - } else if (isScientific) { + if (result.quantity == null && segment.getOffset() != initialOffset) { + // Strings that start with a separator but have no digits. + // We don't need a backup of ParsedNumber because no changes could have been made to it. + segment.setOffset(initialOffset); + hasPartialPrefix = true; + } + + if (result.quantity != null) { + // The final separator was a decimal separator. + result.quantity.adjustMagnitude(-digitsAfterDecimal); + } + + if (isScientific && segment.getOffset() != initialOffset) { boolean overflow = (exponent == Integer.MAX_VALUE); if (!overflow) { try { @@ -298,18 +312,6 @@ public class DecimalMatcher implements NumberParseMatcher { result.flags |= ParsedNumber.FLAG_INFINITY; } } - - } else if (result.quantity == null && segment.getOffset() != initialOffset) { - // Strings that start with a separator but have no digits. - // We don't need a backup of ParsedNumber because no changes could have been made to it. - segment.setOffset(initialOffset); - hasPartialPrefix = true; - - } else if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) { - // The final separator was a decimal separator. - result.quantity.adjustMagnitude(-currGroup); - result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; - } return segment.length() == 0 || hasPartialPrefix; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 38c3afbbbda..7206c2fbf40 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -135,12 +135,16 @@ public class NumberParserImpl { ? (properties.getDecimalSeparatorAlwaysShown() || properties.getMaximumFractionDigits() != 0) : false; + boolean decimalSeparatorForbidden = properties.getDecimalPatternMatchRequired() + ? (!properties.getDecimalSeparatorAlwaysShown() + && properties.getMaximumFractionDigits() == 0) + : false; Grouper grouper = Grouper.defaults().withProperties(properties); int parseFlags = 0; if (!properties.getParseCaseSensitive()) { parseFlags |= ParsingUtils.PARSE_FLAG_IGNORE_CASE; } - if (properties.getParseIntegerOnly()) { + if (properties.getParseIntegerOnly() || decimalSeparatorForbidden) { parseFlags |= ParsingUtils.PARSE_FLAG_INTEGER_ONLY; } if (isStrict) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java index 3da729f80a1..bf0593e1230 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java @@ -26,17 +26,18 @@ public class UnicodeSetStaticCache { STRICT_IGNORABLES, // Separators + // Notes: + // - COMMA is a superset of STRICT_COMMA + // - PERIOD is a superset of SCRICT_PERIOD + // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS + // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS COMMA, PERIOD, - OTHER_GROUPING_SEPARATORS, - COMMA_OR_OTHER, - PERIOD_OR_OTHER, - COMMA_OR_PERIOD_OR_OTHER, STRICT_COMMA, STRICT_PERIOD, - STRICT_COMMA_OR_OTHER, - STRICT_PERIOD_OR_OTHER, - STRICT_COMMA_OR_PERIOD_OR_OTHER, + OTHER_GROUPING_SEPARATORS, + ALL_SEPARATORS, + STRICT_ALL_SEPARATORS, // Symbols // TODO: NaN? @@ -53,12 +54,8 @@ public class UnicodeSetStaticCache { CWCF, // Combined Separators with Digits (for lead code points) - DIGITS_OR_COMMA_OR_OTHER, - DIGITS_OR_PERIOD_OR_OTHER, - DIGITS_OR_COMMA_OR_PERIOD_OR_OTHER, - DIGITS_OR_STRICT_COMMA_OR_OTHER, - DIGITS_OR_STRICT_PERIOD_OR_OTHER, - DIGITS_OR_STRICT_COMMA_OR_PERIOD_OR_OTHER, + DIGITS_OR_ALL_SEPARATORS, + DIGITS_OR_STRICT_ALL_SEPARATORS, }; private static final Map unicodeSets = new EnumMap(Key.class); @@ -79,68 +76,6 @@ public class UnicodeSetStaticCache { return get(key1).contains(str) ? key1 : chooseFrom(str, key2, key3); } - public static Key unionOf(Key key1, Key key2) { - // Make sure key1 < key2 - if (key2.ordinal() < key1.ordinal()) { - Key temp = key1; - key1 = key2; - key2 = temp; - } - - if (key1 == Key.COMMA && key2 == Key.PERIOD_OR_OTHER) { - // 1.234,567 - return Key.COMMA_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.COMMA && key2 == Key.OTHER_GROUPING_SEPARATORS) { - // 1'234,567 - return Key.COMMA_OR_OTHER; - - } else if (key1 == Key.PERIOD && key2 == Key.COMMA_OR_OTHER) { - // 1,234.567 - return Key.COMMA_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.PERIOD && key2 == Key.OTHER_GROUPING_SEPARATORS) { - // 1'234.567 - return Key.PERIOD_OR_OTHER; - - } else if (key1 == Key.STRICT_COMMA && key2 == Key.STRICT_PERIOD_OR_OTHER) { - // Strict 1.234,567 - return Key.STRICT_COMMA_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.STRICT_COMMA && key2 == Key.OTHER_GROUPING_SEPARATORS) { - // Strict 1'234,567 - return Key.STRICT_COMMA_OR_OTHER; - - } else if (key1 == Key.STRICT_PERIOD && key2 == Key.STRICT_COMMA_OR_OTHER) { - // Strict 1,234.567 - return Key.STRICT_COMMA_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.STRICT_PERIOD && key2 == Key.OTHER_GROUPING_SEPARATORS) { - // Strict 1'234.567 - return Key.STRICT_PERIOD_OR_OTHER; - - } else if (key1 == Key.COMMA_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_COMMA_OR_OTHER; - - } else if (key1 == Key.PERIOD_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.COMMA_OR_PERIOD_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_COMMA_OR_PERIOD_OR_OTHER; - - } else if (key1 == Key.STRICT_COMMA_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_STRICT_COMMA_OR_OTHER; - - } else if (key1 == Key.STRICT_PERIOD_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_STRICT_PERIOD_OR_OTHER; - - } else if (key1 == Key.STRICT_COMMA_OR_PERIOD_OR_OTHER && key2 == Key.DIGITS) { - return Key.DIGITS_OR_STRICT_COMMA_OR_PERIOD_OR_OTHER; - } - - return null; - } - private static UnicodeSet computeUnion(Key k1, Key k2) { return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).freeze(); } @@ -167,16 +102,9 @@ public class UnicodeSetStaticCache { unicodeSets.put(Key.STRICT_PERIOD, new UnicodeSet("[.․﹒.。]").freeze()); unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS, new UnicodeSet("['٬‘’'\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]").freeze()); - - unicodeSets.put(Key.COMMA_OR_OTHER, computeUnion(Key.COMMA, Key.OTHER_GROUPING_SEPARATORS)); - unicodeSets.put(Key.PERIOD_OR_OTHER, computeUnion(Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS)); - unicodeSets.put(Key.COMMA_OR_PERIOD_OR_OTHER, + unicodeSets.put(Key.ALL_SEPARATORS, computeUnion(Key.COMMA, Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS)); - unicodeSets.put(Key.STRICT_COMMA_OR_OTHER, - computeUnion(Key.STRICT_COMMA, Key.OTHER_GROUPING_SEPARATORS)); - unicodeSets.put(Key.STRICT_PERIOD_OR_OTHER, - computeUnion(Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS)); - unicodeSets.put(Key.STRICT_COMMA_OR_PERIOD_OR_OTHER, + unicodeSets.put(Key.STRICT_ALL_SEPARATORS, computeUnion(Key.STRICT_COMMA, Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS)); unicodeSets.put(Key.MINUS_SIGN, new UnicodeSet("[-⁻₋−➖﹣-]").freeze()); @@ -188,22 +116,14 @@ public class UnicodeSetStaticCache { unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze()); unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze()); - // Note: locale fi translation of NaN starts with 'e' (conflicts with scientific?) unicodeSets.put(Key.NAN_LEAD, new UnicodeSet("[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]") .freeze()); unicodeSets.put(Key.SCIENTIFIC_LEAD, new UnicodeSet("[Ee×·е\u0627]").freeze()); unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze()); - unicodeSets.put(Key.DIGITS_OR_COMMA_OR_OTHER, computeUnion(Key.DIGITS, Key.COMMA_OR_OTHER)); - unicodeSets.put(Key.DIGITS_OR_PERIOD_OR_OTHER, computeUnion(Key.DIGITS, Key.PERIOD_OR_OTHER)); - unicodeSets.put(Key.DIGITS_OR_COMMA_OR_PERIOD_OR_OTHER, - computeUnion(Key.DIGITS, Key.COMMA_OR_PERIOD_OR_OTHER)); - unicodeSets.put(Key.DIGITS_OR_STRICT_COMMA_OR_OTHER, - computeUnion(Key.DIGITS, Key.STRICT_COMMA_OR_OTHER)); - unicodeSets.put(Key.DIGITS_OR_STRICT_PERIOD_OR_OTHER, - computeUnion(Key.DIGITS, Key.STRICT_PERIOD_OR_OTHER)); - unicodeSets.put(Key.DIGITS_OR_STRICT_COMMA_OR_PERIOD_OR_OTHER, - computeUnion(Key.DIGITS, Key.STRICT_COMMA_OR_PERIOD_OR_OTHER)); + unicodeSets.put(Key.DIGITS_OR_ALL_SEPARATORS, computeUnion(Key.DIGITS, Key.ALL_SEPARATORS)); + unicodeSets.put(Key.DIGITS_OR_STRICT_ALL_SEPARATORS, + computeUnion(Key.DIGITS, Key.STRICT_ALL_SEPARATORS)); } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt index 2e3c634bb5f..28099187281 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt @@ -754,9 +754,10 @@ parse output breaks +3.52EE4 3.52 +1,234,567.8901 1234567.8901 +1,23,4567.8901 1234567.8901 -+1,23,4567.89,01 1234567.89 +// P supports grouping separators in the fraction; none of the others do. ++1,23,4567.89,01 1234567.8901 CJK +1,23,456.78.9 123456.78 -+12.34,56 12.34 ++12.34,56 12.3456 CJK +79,,20,3 79203 +79 20 3 79203 K // Parsing stops at comma as it is different from other separators @@ -852,17 +853,17 @@ parse output breaks // have no separators at all. +12,345.67 12345.67 // JDK doesn't require separators to be in the right place. -// In some, but not all, cases, P stops early. +1,23,4567.8901 fail K -+1,234,567.8901 fail KP ++1,234,567.8901 fail K +1234,567.8901 fail K +1,234567.8901 fail K +1234567.8901 1234567.8901 // Minimum grouping is not satisfied below, but that's ok // because minimum grouping is optional. +1,234.5 1234.5 -// Comma after decimal means parse to a comma -+1,23,456.78,9 123456.78 +// Comma after decimal means a fractional grouping separator +// P fails since it finds an invalid grouping size ++1,23,456.78,9 123456.789 P // C and J fail upon seeing the second decimal point +1,23,456.78.9 123456.78 CJ +79 79 @@ -996,8 +997,9 @@ begin parse output breaks 123.456 123456 123,456 123.456 -987,654.321 987.654 -987,654 321 987.654 +// The separator after the comma can be inrepreted as a fractional grouping +987,654.321 987.654321 CJK +987,654 321 987.654321 CJK 987.654,321 987654.321 test select diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java index 0cabbe8c9f8..3b9a452cebb 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -2807,7 +2807,7 @@ public class NumberFormatTest extends TestFmwk { "0,", // single zero before comma (not group separator) is not leading "0.0", // single zero before decimal followed by digit is not leading "0. ", // same as above before period (or decimal) is not leading - "0.100,5", // comma stops parse of decimal (no grouping) + "0.100,,5", // two commas stop parse of decimal ".00", // leading decimal is ok, even with zeros "1234567", // group separators are not required "12345, ", // comma not followed by digit is not a group separator, but end of number @@ -2828,7 +2828,6 @@ public class NumberFormatTest extends TestFmwk { ",1", // leading group separator before digit ",.02", // leading group separator before decimal "1,.02", // group separator before decimal - //"1,,200", // multiple group separators "1,45", // wrong number of digits in primary group "1,45 that", // wrong number of digits in primary group "1,45.34", // wrong number of digits in primary group @@ -4784,13 +4783,12 @@ public class NumberFormatTest extends TestFmwk { fmt.applyPattern("@@@E0"); expect2(fmt, 1230000, "(1).(2)(3)E(6)"); - // Grouping and decimal with multiple code points are not supported during parsing. + // Grouping and decimal with multiple code points (supported in parsing since ICU 61) symbols.setDecimalSeparatorString("~~"); symbols.setGroupingSeparatorString("^^"); fmt.setDecimalFormatSymbols(symbols); fmt.applyPattern("#,##0.0#"); - assertEquals("Custom decimal and grouping separator string with multiple characters", - "(1)^^(2)(3)(4)^^(5)(6)(7)~~(8)(9)", fmt.format(1234567.89)); + expect2(fmt, 1234567.89, "(1)^^(2)(3)(4)^^(5)(6)(7)~~(8)(9)"); // Digits starting at U+1D7CE MATHEMATICAL BOLD DIGIT ZERO // These are all single code points, so parsing will work. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 847fa771948..c4bfc5fc793 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -83,7 +83,7 @@ public class NumberParserTest { { 3, "📺1.23", "📺0;📻0", 6, 1.23 }, { 3, "📻1.23", "📺0;📻0", 6, -1.23 }, { 3, ".00", "0", 3, 0.0 }, - { 3, " 0", "a0", 31, 0.0}, // should not hang + { 3, " 0", "a0", 31, 0.0 }, // should not hang { 3, "0", "0", 1, 0.0 } }; for (Object[] cas : cases) { @@ -101,8 +101,13 @@ public class NumberParserTest { ParsedNumber resultObject = new ParsedNumber(); parser.parse(input, true, resultObject); assertNotNull("Greedy Parse failed: " + message, resultObject.quantity); - assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed); - assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0); + assertEquals("Greedy Parse failed: " + message, + expectedCharsConsumed, + resultObject.charsConsumed); + assertEquals("Greedy Parse failed: " + message, + resultDouble, + resultObject.getNumber().doubleValue(), + 0.0); } if (0 != (flags & 0x02)) { @@ -110,8 +115,13 @@ public class NumberParserTest { ParsedNumber resultObject = new ParsedNumber(); parser.parse(input, false, resultObject); assertNotNull("Non-Greedy Parse failed: " + message, resultObject.quantity); - assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed); - assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0); + assertEquals("Non-Greedy Parse failed: " + message, + expectedCharsConsumed, + resultObject.charsConsumed); + assertEquals("Non-Greedy Parse failed: " + message, + resultDouble, + resultObject.getNumber().doubleValue(), + 0.0); } if (0 != (flags & 0x04)) { @@ -120,8 +130,13 @@ public class NumberParserTest { ParsedNumber resultObject = new ParsedNumber(); parser.parse(input, true, resultObject); assertNotNull("Strict Parse failed: " + message, resultObject.quantity); - assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed); - assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0); + assertEquals("Strict Parse failed: " + message, + expectedCharsConsumed, + resultObject.charsConsumed); + assertEquals("Strict Parse failed: " + message, + resultDouble, + resultObject.getNumber().doubleValue(), + 0.0); } } } @@ -138,9 +153,9 @@ public class NumberParserTest { assertEquals(Double.NaN, resultObject.getNumber().doubleValue(), 0.0); resultObject = new ParsedNumber(); - parser.parse("1.2e3", false, resultObject); + parser.parse("1,2e3", false, resultObject); assertTrue(resultObject.success()); - assertEquals(12000.0, resultObject.getNumber().doubleValue(), 0.0); + assertEquals(1200.0, resultObject.getNumber().doubleValue(), 0.0); } @Test