From d63528f43774d68a9d602178dcd9be1fafcf1c1a Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 16 Dec 2017 02:54:58 +0000 Subject: [PATCH] ICU-13513 More work on affix matchers. Refactoring code for grouping and decimal separators. X-SVN-Rev: 40739 --- .../icu/impl/number/AffixPatternProvider.java | 6 + .../com/ibm/icu/impl/number/AffixUtils.java | 22 ++- .../icu/impl/number/parse/AffixMatcher.java | 182 ++++++------------ .../icu/impl/number/parse/DecimalMatcher.java | 89 +++------ .../impl/number/parse/IgnorablesMatcher.java | 68 +++++++ .../ibm/icu/impl/number/parse/NanMatcher.java | 39 ++++ .../impl/number/parse/NumberParserImpl.java | 109 +++++------ .../icu/impl/number/parse/ParsedNumber.java | 9 + .../parse/RequireDecimalSeparatorMatcher.java | 27 +++ .../number/parse/RequireNumberMatcher.java | 2 +- .../impl/number/parse/ScientificMatcher.java | 8 +- .../impl/number/parse/SeparatorSetUtils.java | 109 +++++++++++ .../impl/number/parse/WhitespaceMatcher.java | 48 ----- .../data/numberformattestspecification.txt | 98 +++++----- .../icu/dev/test/number/AffixUtilsTest.java | 9 +- .../icu/dev/test/number/NumberParserTest.java | 15 +- 16 files changed, 489 insertions(+), 351 deletions(-) create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java delete mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java index 6052bb18e51..daf22c29051 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java @@ -10,6 +10,12 @@ public interface AffixPatternProvider { public static final int PADDING = 0x400; } + // Convenience compound flags + public static final int FLAG_POS_PREFIX = Flags.PREFIX; + public static final int FLAG_POS_SUFFIX = 0; + public static final int FLAG_NEG_PREFIX = Flags.PREFIX | Flags.NEGATIVE_SUBPATTERN; + public static final int FLAG_NEG_SUFFIX = Flags.NEGATIVE_SUBPATTERN; + public char charAt(int flags, int i); public int length(int flags); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java index 0e5f36cf4e7..43288478d2b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java @@ -3,6 +3,7 @@ package com.ibm.icu.impl.number; import com.ibm.icu.text.NumberFormat; +import com.ibm.icu.text.UnicodeSet; /** * Performs manipulations on affix patterns: the prefix and suffix strings associated with a decimal @@ -386,19 +387,30 @@ public class AffixUtils { } /** - * Appends a new affix pattern with all symbols removed. Like calling unescape with a symbol provider that always - * returns the empty string. + * Appends a new affix pattern with all symbols and code points in the given "ignorables" UnicodeSet stripped out. + * Similar to calling unescape with a symbol provider that always returns the empty string. + * + *

+ * Accepts and returns a StringBuilder, allocating it only if necessary. */ - public static void removeSymbols(CharSequence affixPattern, StringBuilder output) { + public static StringBuilder withoutSymbolsOrIgnorables( + CharSequence affixPattern, + UnicodeSet ignorables, + StringBuilder sb) { assert affixPattern != null; long tag = 0L; while (hasNext(tag, affixPattern)) { tag = nextToken(tag, affixPattern); int typeOrCp = getTypeOrCp(tag); - if (typeOrCp >= 0) { - output.appendCodePoint(typeOrCp); + if (typeOrCp >= 0 && !ignorables.contains(typeOrCp)) { + if (sb == null) { + // Lazy-initialize the StringBuilder + sb = new StringBuilder(); + } + sb.appendCodePoint(typeOrCp); } } + return sb; } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java index 70b7b4c3bd8..c1aaf9c5df3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java @@ -2,16 +2,13 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; -import java.util.Collection; +import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; -import java.util.Set; -import java.util.TreeSet; -import com.ibm.icu.impl.StandardPlural; import com.ibm.icu.impl.number.AffixPatternProvider; import com.ibm.icu.impl.number.AffixUtils; -import com.ibm.icu.impl.number.MutablePatternModifier; -import com.ibm.icu.impl.number.NumberStringBuilder; +import com.ibm.icu.text.UnicodeSet; /** * @author sffc @@ -43,132 +40,78 @@ public class AffixMatcher implements NumberParseMatcher { } }; - /** - * Creates multiple AffixMatchers, enough to cover the requirements for the given pattern modifier, appending them - * in order to the NumberParserImpl. - */ - public static void generateFromPatternModifier( - MutablePatternModifier patternModifier, - int flags, - boolean includeUnpaired, - NumberParserImpl output) { - - // Store the matchers in a TreeSet to ensure both uniqueness and order. - Set matchers = new TreeSet(COMPARATOR); - - // Construct one matcher per isNegative/plural combination. Most of the time, plurals aren't needed, so only - // two matchers will be created, one for positive and one for negative. - NumberStringBuilder nsb = new NumberStringBuilder(); - boolean isNegative = false; - while (true) { - if (isNegative) { - flags |= ParsedNumber.FLAG_NEGATIVE; + public static void generateFromAffixPatternProvider( + AffixPatternProvider patternInfo, + NumberParserImpl output, + UnicodeSet ignorables, + boolean includeUnpaired) { + // Lazy-initialize the StringBuilder. + StringBuilder sb = null; + + // Use initial capacity of 6, the highest possible number of AffixMatchers. + // TODO: Lazy-initialize? + ArrayList matchers = new ArrayList(6); + + sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_PREFIX, ignorables, sb); + String posPrefix = toStringOrEmpty(sb); + sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_SUFFIX, ignorables, sb); + String posSuffix = toStringOrEmpty(sb); + + if (!posPrefix.isEmpty() || !posSuffix.isEmpty()) { + matchers.add(getInstance(posPrefix, posSuffix, 0)); + if (includeUnpaired && !posPrefix.isEmpty() && !posSuffix.isEmpty()) { + matchers.add(getInstance(posPrefix, "", 0)); + matchers.add(getInstance("", posSuffix, 0)); } + } - if (patternModifier.needsPlurals()) { - for (StandardPlural plural : StandardPlural.VALUES) { - patternModifier.setNumberProperties(isNegative, plural); - AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired); + if (patternInfo.hasNegativeSubpattern()) { + sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_PREFIX, ignorables, sb); + String negPrefix = toStringOrEmpty(sb); + sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_SUFFIX, ignorables, sb); + String negSuffix = toStringOrEmpty(sb); + + if (negPrefix.equals(posPrefix) && negSuffix.equals(posSuffix)) { + // No-op: favor the positive AffixMatcher + } else if (!negPrefix.isEmpty() || !negSuffix.isEmpty()) { + matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG_NEGATIVE)); + if (includeUnpaired && !negPrefix.isEmpty() && !negSuffix.isEmpty()) { + if (!negPrefix.equals(posPrefix)) { + matchers.add(getInstance(negPrefix, "", ParsedNumber.FLAG_NEGATIVE)); + } + if (!negSuffix.equals(posSuffix)) { + matchers.add(getInstance("", negSuffix, ParsedNumber.FLAG_NEGATIVE)); + } } - } else { - patternModifier.setNumberProperties(isNegative, null); - AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired); - } - - if (isNegative) { - break; - } else { - isNegative = true; } } - for (AffixMatcher matcher : matchers) { - output.addMatcher(matcher); - } + // Put the AffixMatchers in order, and then add them to the output. + Collections.sort(matchers, COMPARATOR); + output.addMatchers(matchers); } - public static void generateFromAffixPatternProvider(AffixPatternProvider patternInfo, - NumberParserImpl output, - boolean includeUnpaired) { - AffixMatcher positive = null; - AffixMatcher negative = null; - - StringBuilder sb = new StringBuilder(); - AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.PREFIX), sb); - String prefix = sb.toString(); - sb.setLength(0); - AffixUtils.removeSymbols(patternInfo.getString(/* suffix */ 0), sb); - String suffix = sb.toString(); - if (!prefix.isEmpty() || !suffix.isEmpty()) { - positive = new AffixMatcher(prefix, suffix, 0); - } - - if (patternInfo.hasNegativeSubpattern()) { + private static StringBuilder getCleanAffix( + AffixPatternProvider patternInfo, + int flag, + UnicodeSet ignorables, + StringBuilder sb) { + if (sb != null) { sb.setLength(0); - AffixUtils.removeSymbols(patternInfo - .getString(AffixPatternProvider.Flags.PREFIX | AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb); - prefix = sb.toString(); - sb.setLength(0); - AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb); - suffix = sb.toString(); - if (!prefix.isEmpty() || !suffix.isEmpty()) { - negative = new AffixMatcher(prefix, suffix, ParsedNumber.FLAG_NEGATIVE); - } } - - if (positive != null && negative != null) { - int comparison = COMPARATOR.compare(positive, negative); - if (comparison > 0) { - appendTo(negative, output, includeUnpaired); - appendTo(positive, output, includeUnpaired); - } else if (comparison < 0) { - appendTo(positive, output, includeUnpaired); - appendTo(negative, output, includeUnpaired); - } else { - // The two candidates are equal; favor the positive one - appendTo(positive, output, includeUnpaired); - } - } else if (positive != null) { - appendTo(positive, output, includeUnpaired); - } else if (negative != null) { - appendTo(negative, output, includeUnpaired); - } else { - // No affixes to append this time + if (patternInfo.length(flag) > 0) { + sb = AffixUtils.withoutSymbolsOrIgnorables(patternInfo.getString(flag), ignorables, sb); } + return sb; } - private static void appendTo(AffixMatcher matcher, NumberParserImpl output, boolean includeUnpaired) { - output.addMatcher(matcher); - if (includeUnpaired && !matcher.prefix.isEmpty() && !matcher.suffix.isEmpty()) { - output.addMatcher(new AffixMatcher(matcher.prefix, "", matcher.flags)); - output.addMatcher(new AffixMatcher("", matcher.suffix, matcher.flags)); - } + private static String toStringOrEmpty(StringBuilder sb) { + return (sb == null || sb.length() == 0) ? "" : sb.toString(); } - /** - * Constructs one or more AffixMatchers from the given MutablePatternModifier and flags, appending them to the given - * collection. The NumberStringBuilder is used as a temporary object only. - * - * @param includeUnpaired If true, create additional AffixMatchers with an unpaired prefix or suffix. - */ - private static void createAndAppendTo( - Collection appendTo, - MutablePatternModifier patternModifier, - int flags, - NumberStringBuilder nsb, - boolean includeUnpaired) { - // TODO: Make this more efficient (avoid the substrings and things) - nsb.clear(); - patternModifier.apply(nsb, 0, 0); - int prefixLength = patternModifier.getPrefixLength(); - String full = nsb.toString(); - String prefix = full.substring(0, prefixLength); - String suffix = full.substring(prefixLength); - appendTo.add(new AffixMatcher(prefix, suffix, flags)); - if (includeUnpaired && !prefix.isEmpty() && !suffix.isEmpty()) { - appendTo.add(new AffixMatcher(prefix, "", flags)); - appendTo.add(new AffixMatcher("", suffix, flags)); - } + private static final AffixMatcher getInstance(String prefix, String suffix, int flags) { + // TODO: Special handling for common cases like both strings empty. + return new AffixMatcher(prefix, suffix, flags); } private AffixMatcher(String prefix, String suffix, int flags) { @@ -179,7 +122,7 @@ public class AffixMatcher implements NumberParseMatcher { @Override public boolean match(StringSegment segment, ParsedNumber result) { - if (result.quantity == null) { + if (!result.seenNumber()) { // Prefix if (result.prefix != null || prefix.length() == 0) { return false; @@ -255,6 +198,7 @@ public class AffixMatcher implements NumberParseMatcher { @Override public String toString() { - return ""; + boolean isNegative = 0 != (flags & ParsedNumber.FLAG_NEGATIVE); + return ""; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java index 09165b43fec..129519fa028 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java @@ -13,73 +13,39 @@ import com.ibm.icu.text.UnicodeSet; */ public class DecimalMatcher implements NumberParseMatcher { - // TODO: Re-generate these sets from the database. They probably haven't been updated in a while. - private static final UnicodeSet UNISET_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]") - .freeze(); - private static final UnicodeSet UNISET_STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]") - .freeze(); - private static final UnicodeSet UNISET_COMMA_LIKE = new UnicodeSet( - "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze(); - private static final UnicodeSet UNISET_STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]") - .freeze(); - private static final UnicodeSet UNISET_OTHER_GROUPING_SEPARATORS = new UnicodeSet( - "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze(); - - public static DecimalMatcher getInstance(DecimalFormatSymbols symbols) { - String groupingSeparator = symbols.getGroupingSeparatorString(); - UnicodeSet groupingSet = UNISET_COMMA_LIKE.contains(groupingSeparator) - ? UNISET_COMMA_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze() - : UNISET_PERIOD_LIKE.contains(groupingSeparator) - ? UNISET_PERIOD_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze() - : UNISET_OTHER_GROUPING_SEPARATORS.contains(groupingSeparator) - ? UNISET_OTHER_GROUPING_SEPARATORS - : new UnicodeSet().addAll(groupingSeparator).freeze(); - - String decimalSeparator = symbols.getDecimalSeparatorString(); - UnicodeSet decimalSet = UNISET_COMMA_LIKE.contains(decimalSeparator) ? UNISET_COMMA_LIKE - : UNISET_PERIOD_LIKE.contains(decimalSeparator) ? UNISET_PERIOD_LIKE - : new UnicodeSet().addAll(decimalSeparator).freeze(); - - return new DecimalMatcher(symbols.getDigitStrings(), groupingSet, decimalSet, false); - } - - public static DecimalMatcher getExponentInstance(DecimalFormatSymbols symbols) { - return new DecimalMatcher(symbols.getDigitStrings(), - new UnicodeSet("[,]").freeze(), - new UnicodeSet("[.]").freeze(), - true); - } - private final String[] digitStrings; - private final UnicodeSet groupingUniSet; - private final UnicodeSet decimalUniSet; - private final UnicodeSet separatorSet; public boolean requireGroupingMatch = false; + public boolean decimalEnabled = true; public boolean groupingEnabled = true; public int grouping1 = 3; public int grouping2 = 3; public boolean integerOnly = false; - private final boolean isScientific; - - private DecimalMatcher( - String[] digitStrings, - UnicodeSet groupingUniSet, - UnicodeSet decimalUniSet, - boolean isScientific) { - this.digitStrings = digitStrings; - this.groupingUniSet = groupingUniSet; - this.decimalUniSet = decimalUniSet; - if (groupingEnabled) { - separatorSet = groupingUniSet.cloneAsThawed().addAll(decimalUniSet).freeze(); - } else { - separatorSet = decimalUniSet; - } - this.isScientific = isScientific; + public boolean isScientific = false; + + private UnicodeSet groupingUniSet; + private UnicodeSet decimalUniSet; + private UnicodeSet separatorSet; + private String[] digitStrings; + private boolean frozen; + + public DecimalMatcher() { + frozen = false; + } + + public void freeze(DecimalFormatSymbols symbols, boolean isStrict) { + assert !frozen; + frozen = true; + + groupingUniSet = SeparatorSetUtils.getGroupingUnicodeSet(symbols, isStrict); + decimalUniSet = SeparatorSetUtils.getDecimalUnicodeSet(symbols, isStrict); + separatorSet = SeparatorSetUtils.unionUnicodeSets(groupingUniSet, decimalUniSet); + digitStrings = symbols.getDigitStringsLocal(); } @Override public boolean match(StringSegment segment, ParsedNumber result) { - if (result.quantity != null && !isScientific) { + assert frozen; + if (result.seenNumber() && !isScientific) { // A number has already been consumed. return false; } @@ -167,8 +133,11 @@ public class DecimalMatcher implements NumberParseMatcher { if (isScientific) { result.quantity.adjustMagnitude(exponent); + } else if (result.quantity == null) { + // No-op: strings that start with a separator without any other digits } else if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) { // The final separator was a decimal separator. + result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; result.quantity.adjustMagnitude(-currGroup); if (integerOnly) { result.quantity.truncate(); @@ -188,8 +157,8 @@ public class DecimalMatcher implements NumberParseMatcher { result.quantity.adjustMagnitude(-currGroup); result.quantity.truncate(); segment.setOffset(lastSeparatorOffset); -// result.quantity = null; -// segment.setOffset(initialOffset); + // result.quantity = null; + // segment.setOffset(initialOffset); } return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate(); @@ -202,6 +171,6 @@ public class DecimalMatcher implements NumberParseMatcher { @Override public String toString() { - return ""; + return ""; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java new file mode 100644 index 00000000000..76d98b20cce --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java @@ -0,0 +1,68 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.impl.number.parse; + +import com.ibm.icu.text.UnicodeSet; + +/** + * @author sffc + * + */ +public class IgnorablesMatcher implements NumberParseMatcher { + + // BiDi characters are skipped over and ignored at any point in the string, even in strict mode. + static final UnicodeSet UNISET_BIDI = new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze(); + + // This set was decided after discussion with icu-design@. See ticket #13309. + // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). + static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze(); + + /** The default set of ignorables. */ + static final UnicodeSet DEFAULT_UNISET = UNISET_BIDI.cloneAsThawed().addAll(UNISET_WHITESPACE).freeze(); + + /** The default set of ignorables for strict mode. */ + static final UnicodeSet STRICT_UNISET = UNISET_BIDI; + + private static final IgnorablesMatcher DEFAULT_INSTANCE = new IgnorablesMatcher(DEFAULT_UNISET); + private static final IgnorablesMatcher STRICT_INSTANCE = new IgnorablesMatcher(STRICT_UNISET); + + public static IgnorablesMatcher getInstance(UnicodeSet ignorables) { + assert ignorables.isFrozen(); + if (ignorables == DEFAULT_UNISET || ignorables.equals(DEFAULT_UNISET)) { + return DEFAULT_INSTANCE; + } else if (ignorables == STRICT_UNISET || ignorables.equals(STRICT_UNISET)) { + return STRICT_INSTANCE; + } else { + return new IgnorablesMatcher(ignorables); + } + } + + private final UnicodeSet ignorables; + + private IgnorablesMatcher(UnicodeSet ignorables) { + this.ignorables = ignorables; + } + + @Override + public boolean match(StringSegment segment, ParsedNumber result) { + while (segment.length() > 0) { + int cp = segment.getCodePoint(); + if (cp == -1 || !ignorables.contains(cp)) { + break; + } + segment.adjustOffset(Character.charCount(cp)); + // Note: Do not touch the charsConsumed. + } + return segment.length() == 0 || segment.isLeadingSurrogate(); + } + + @Override + public void postProcess(ParsedNumber result) { + // No-op + } + + @Override + public String toString() { + return ""; + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java new file mode 100644 index 00000000000..795b7299687 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java @@ -0,0 +1,39 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.impl.number.parse; + +import com.ibm.icu.text.DecimalFormatSymbols; + +/** + * @author sffc + * + */ +public class NanMatcher implements NumberParseMatcher { + + private final String nanString; + + public NanMatcher(DecimalFormatSymbols symbols) { + nanString = symbols.getNaN(); + } + + @Override + public boolean match(StringSegment segment, ParsedNumber result) { + int overlap = segment.getCommonPrefixLength(nanString); + if (overlap == nanString.length()) { + result.flags |= ParsedNumber.FLAG_NAN; + segment.adjustOffset(overlap); + result.setCharsConsumed(segment); + return false; + } else if (overlap == segment.length()) { + return true; + } else { + return false; + } + } + + @Override + public void postProcess(ParsedNumber result) { + // No-op + } + +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index a6a01789044..96d4dc6a2b0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -4,6 +4,7 @@ package com.ibm.icu.impl.number.parse; import java.text.ParsePosition; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; import java.util.List; @@ -11,13 +12,11 @@ import com.ibm.icu.impl.number.AffixPatternProvider; import com.ibm.icu.impl.number.AffixUtils; import com.ibm.icu.impl.number.CustomSymbolCurrency; import com.ibm.icu.impl.number.DecimalFormatProperties; -import com.ibm.icu.impl.number.MutablePatternModifier; import com.ibm.icu.impl.number.Parse.ParseMode; import com.ibm.icu.impl.number.PatternStringParser; import com.ibm.icu.impl.number.PropertiesAffixPatternProvider; -import com.ibm.icu.number.NumberFormatter.SignDisplay; -import com.ibm.icu.number.NumberFormatter.UnitWidth; import com.ibm.icu.text.DecimalFormatSymbols; +import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.Currency; import com.ibm.icu.util.CurrencyAmount; import com.ibm.icu.util.ULocale; @@ -36,25 +35,15 @@ public class NumberParserImpl { ULocale locale = new ULocale("en_IN"); DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); - MutablePatternModifier mod = new MutablePatternModifier(false); - AffixPatternProvider provider = PatternStringParser.parseToPatternInfo(pattern); - mod.setPatternInfo(provider); - mod.setPatternAttributes(SignDisplay.AUTO, false); - mod.setSymbols(symbols, Currency.getInstance("USD"), UnitWidth.FULL_NAME, null); - int flags = 0; - if (provider.containsSymbolType(AffixUtils.TYPE_PERCENT)) { - flags |= ParsedNumber.FLAG_PERCENT; - } - if (provider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) { - flags |= ParsedNumber.FLAG_PERMILLE; - } - AffixMatcher.generateFromPatternModifier(mod, flags, true, parser); + AffixPatternProvider patternInfo = PatternStringParser.parseToPatternInfo(pattern); + AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, new UnicodeSet(), true); - parser.addMatcher(WhitespaceMatcher.getInstance()); - DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols); + parser.addMatcher(IgnorablesMatcher.getInstance(IgnorablesMatcher.DEFAULT_UNISET)); + DecimalMatcher decimalMatcher = new DecimalMatcher(); decimalMatcher.requireGroupingMatch = strictGrouping; decimalMatcher.grouping1 = 3; decimalMatcher.grouping2 = 2; + decimalMatcher.freeze(symbols, false); parser.addMatcher(decimalMatcher); parser.addMatcher(new MinusSignMatcher()); parser.addMatcher(new ScientificMatcher(symbols)); @@ -65,10 +54,11 @@ public class NumberParserImpl { return parser; } - public static Number parseStatic(String input, - ParsePosition ppos, - DecimalFormatProperties properties, - DecimalFormatSymbols symbols) { + public static Number parseStatic( + String input, + ParsePosition ppos, + DecimalFormatProperties properties, + DecimalFormatSymbols symbols) { NumberParserImpl parser = createParserFromProperties(properties, symbols, false); ParsedNumber result = new ParsedNumber(); parser.parse(input, true, result); @@ -80,10 +70,11 @@ public class NumberParserImpl { } } - public static CurrencyAmount parseStaticCurrency(String input, - ParsePosition ppos, - DecimalFormatProperties properties, - DecimalFormatSymbols symbols) { + public static CurrencyAmount parseStaticCurrency( + String input, + ParsePosition ppos, + DecimalFormatProperties properties, + DecimalFormatSymbols symbols) { NumberParserImpl parser = createParserFromProperties(properties, symbols, true); ParsedNumber result = new ParsedNumber(); parser.parse(input, true, result); @@ -111,62 +102,49 @@ public class NumberParserImpl { ULocale locale = symbols.getULocale(); Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols); boolean isStrict = properties.getParseMode() == ParseMode.STRICT; + UnicodeSet ignorables = isStrict ? IgnorablesMatcher.STRICT_UNISET : IgnorablesMatcher.DEFAULT_UNISET; - //////////////////////// - /// CURRENCY MATCHER /// - //////////////////////// - - if (parseCurrency) { - parser.addMatcher(new CurrencyMatcher(locale)); - } + boolean decimalSeparatorRequired = properties.getDecimalPatternMatchRequired() + ? (properties.getDecimalSeparatorAlwaysShown() || properties.getMaximumFractionDigits() != 0) + : false; ////////////////////// /// AFFIX MATCHERS /// ////////////////////// // Set up a pattern modifier with mostly defaults to generate AffixMatchers. - MutablePatternModifier mod = new MutablePatternModifier(false); AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties); -// mod.setPatternInfo(patternInfo); -// mod.setPatternAttributes(SignDisplay.AUTO, false); -// mod.setSymbols(symbols, currency, UnitWidth.SHORT, null); -// -// // Figure out which flags correspond to this pattern modifier. Note: negatives are taken care of in the -// // generateFromPatternModifier function. -// int flags = 0; -// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) { -// flags |= ParsedNumber.FLAG_PERCENT; -// } -// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) { -// flags |= ParsedNumber.FLAG_PERMILLE; -// } -// if (patternInfo.hasCurrencySign()) { -// flags |= ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY; -// } -// -// parseCurrency = parseCurrency || patternInfo.hasCurrencySign(); -// -// AffixMatcher.generateFromPatternModifier(mod, flags, !isStrict && !parseCurrency, parser); - - AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, !isStrict); + AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, !isStrict); + + //////////////////////// + /// CURRENCY MATCHER /// + //////////////////////// + + parseCurrency = parseCurrency || patternInfo.hasCurrencySign(); + if (parseCurrency) { + parser.addMatcher(new CurrencyMatcher(locale)); + } /////////////////////////////// /// OTHER STANDARD MATCHERS /// /////////////////////////////// if (!isStrict) { - parser.addMatcher(WhitespaceMatcher.getInstance()); + parser.addMatcher(IgnorablesMatcher.getInstance(ignorables)); } if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)) { parser.addMatcher(new PlusSignMatcher()); } parser.addMatcher(new MinusSignMatcher()); - DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols); - decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0; + parser.addMatcher(new NanMatcher(symbols)); + DecimalMatcher decimalMatcher = new DecimalMatcher(); decimalMatcher.requireGroupingMatch = isStrict; + decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0; + decimalMatcher.decimalEnabled = properties.getDecimalPatternMatchRequired() ? decimalSeparatorRequired : true; decimalMatcher.grouping1 = properties.getGroupingSize(); decimalMatcher.grouping2 = properties.getSecondaryGroupingSize(); decimalMatcher.integerOnly = properties.getParseIntegerOnly(); + decimalMatcher.freeze(symbols, isStrict); parser.addMatcher(decimalMatcher); if (!properties.getParseNoExponent()) { parser.addMatcher(new ScientificMatcher(symbols)); @@ -186,6 +164,9 @@ public class NumberParserImpl { if (parseCurrency) { parser.addMatcher(new RequireCurrencyMatcher()); } + if (decimalSeparatorRequired) { + parser.addMatcher(new RequireDecimalSeparatorMatcher()); + } //////////////////////// /// OTHER ATTRIBUTES /// @@ -212,14 +193,22 @@ public class NumberParserImpl { } public void addMatcher(NumberParseMatcher matcher) { - matchers.add(matcher); + assert !frozen; + this.matchers.add(matcher); + } + + public void addMatchers(Collection matchers) { + assert !frozen; + this.matchers.addAll(matchers); } public void setComparator(Comparator comparator) { + assert !frozen; this.comparator = comparator; } public void setIgnoreCase(boolean ignoreCase) { + assert !frozen; this.ignoreCase = ignoreCase; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java index d9e945c063f..02555f0778d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java @@ -46,6 +46,8 @@ public class ParsedNumber { public static final int FLAG_PERMILLE = 0x0004; public static final int FLAG_HAS_EXPONENT = 0x0008; public static final int FLAG_HAS_DEFAULT_CURRENCY = 0x0010; + public static final int FLAG_HAS_DECIMAL_SEPARATOR = 0x0020; + public static final int FLAG_NAN = 0x0040; /** A Comparator that favors ParsedNumbers with the most chars consumed. */ public static final Comparator COMPARATOR = new Comparator() { @@ -84,7 +86,14 @@ public class ParsedNumber { charsConsumed = segment.getOffset(); } + public boolean seenNumber() { + return quantity != null || 0 != (flags & FLAG_NAN); + } + public double getDouble() { + if (0 != (flags & FLAG_NAN)) { + return Double.NaN; + } double d = quantity.toDouble(); if (0 != (flags & FLAG_NEGATIVE)) { d = -d; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java new file mode 100644 index 00000000000..2348e48b607 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java @@ -0,0 +1,27 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.impl.number.parse; + +/** + * @author sffc + * + */ +public class RequireDecimalSeparatorMatcher implements NumberParseMatcher { + + @Override + public boolean match(StringSegment segment, ParsedNumber result) { + return false; + } + + @Override + public void postProcess(ParsedNumber result) { + if (0 == (result.flags & ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR)) { + result.clear(); + } + } + + @Override + public String toString() { + return ""; + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java index 2477a652d9f..c7a168ce4da 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java @@ -16,7 +16,7 @@ public class RequireNumberMatcher implements NumberParseMatcher { @Override public void postProcess(ParsedNumber result) { // Require that a number is matched. - if (result.quantity == null) { + if (!result.seenNumber()) { result.clear(); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java index bf58e976403..48032a59df1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java @@ -17,13 +17,17 @@ public class ScientificMatcher implements NumberParseMatcher { public ScientificMatcher(DecimalFormatSymbols symbols) { exponentSeparatorString = symbols.getExponentSeparator(); minusSignString = symbols.getMinusSignString(); - exponentMatcher = DecimalMatcher.getExponentInstance(symbols); + exponentMatcher = new DecimalMatcher(); + exponentMatcher.isScientific = true; + exponentMatcher.groupingEnabled = false; + exponentMatcher.decimalEnabled = false; + exponentMatcher.freeze(symbols, false); } @Override public boolean match(StringSegment segment, ParsedNumber result) { // Only accept scientific notation after the mantissa. - if (result.quantity == null) { + if (!result.seenNumber()) { return false; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java new file mode 100644 index 00000000000..16b2be1bbc6 --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java @@ -0,0 +1,109 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.impl.number.parse; + +import com.ibm.icu.text.DecimalFormatSymbols; +import com.ibm.icu.text.UnicodeSet; + +/** + * @author sffc + * + */ +public class SeparatorSetUtils { + + // TODO: Re-generate these sets from the database. They probably haven't been updated in a while. + + static final UnicodeSet COMMA_LIKE = new UnicodeSet( + "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze(); + + static final UnicodeSet STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze(); + + static final UnicodeSet PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze(); + + static final UnicodeSet STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze(); + + static final UnicodeSet OTHER_GROUPING_SEPARATORS = new UnicodeSet( + "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze(); + + static final UnicodeSet COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE).freeze(); + + static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(STRICT_COMMA_LIKE) + .addAll(STRICT_PERIOD_LIKE).freeze(); + + static final UnicodeSet COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(OTHER_GROUPING_SEPARATORS) + .freeze(); + + static final UnicodeSet STRICT_COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE) + .addAll(OTHER_GROUPING_SEPARATORS).freeze(); + + static final UnicodeSet PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(PERIOD_LIKE) + .addAll(OTHER_GROUPING_SEPARATORS).freeze(); + + static final UnicodeSet STRICT_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_PERIOD_LIKE) + .addAll(OTHER_GROUPING_SEPARATORS).freeze(); + + static final UnicodeSet COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE) + .addAll(OTHER_GROUPING_SEPARATORS).freeze(); + + static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE) + .addAll(STRICT_PERIOD_LIKE).addAll(OTHER_GROUPING_SEPARATORS).freeze(); + + public static UnicodeSet getGroupingUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) { + if (isStrict) { + return chooseUnicodeSet(symbols.getGroupingSeparatorString(), + STRICT_COMMA_LIKE_OR_OTHER, + STRICT_PERIOD_LIKE_OR_OTHER, + OTHER_GROUPING_SEPARATORS); + } else { + return chooseUnicodeSet(symbols.getGroupingSeparatorString(), + COMMA_LIKE_OR_OTHER, + PERIOD_LIKE_OR_OTHER, + OTHER_GROUPING_SEPARATORS); + } + } + + public static UnicodeSet getDecimalUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) { + if (isStrict) { + return chooseUnicodeSet(symbols.getDecimalSeparatorString(), STRICT_COMMA_LIKE, STRICT_PERIOD_LIKE); + } else { + return chooseUnicodeSet(symbols.getDecimalSeparatorString(), COMMA_LIKE, PERIOD_LIKE); + } + } + + private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1) { + return set1.contains(str) ? set1 : new UnicodeSet().add(str).freeze(); + } + + private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2) { + return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2); + } + + private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2, UnicodeSet set3) { + return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2, set3); + } + + public static UnicodeSet unionUnicodeSets(UnicodeSet set1, UnicodeSet set2) { + // Note: == operators should be okay here since non-static UnicodeSets happen only in fallback cases. + if (set1 == UnicodeSet.EMPTY && set2 == UnicodeSet.EMPTY) { + return UnicodeSet.EMPTY; + } else if (set1 == COMMA_LIKE_OR_OTHER && set2 == PERIOD_LIKE_OR_OTHER) { + return COMMA_OR_PERIOD_LIKE_OR_OTHER; + } else if (set1 == PERIOD_LIKE_OR_OTHER && set2 == COMMA_LIKE_OR_OTHER) { + return COMMA_OR_PERIOD_LIKE_OR_OTHER; + } else if (set1 == STRICT_COMMA_LIKE_OR_OTHER && set2 == STRICT_PERIOD_LIKE_OR_OTHER) { + return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER; + } else if (set1 == STRICT_PERIOD_LIKE_OR_OTHER && set2 == STRICT_COMMA_LIKE_OR_OTHER) { + return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER; + } else if (set1 == COMMA_LIKE && set2 == PERIOD_LIKE) { + return COMMA_OR_PERIOD_LIKE; + } else if (set1 == PERIOD_LIKE && set2 == COMMA_LIKE) { + return COMMA_OR_PERIOD_LIKE; + } else if (set1 == STRICT_COMMA_LIKE && set2 == STRICT_PERIOD_LIKE) { + return STRICT_COMMA_OR_PERIOD_LIKE; + } else if (set1 == STRICT_PERIOD_LIKE && set2 == STRICT_COMMA_LIKE) { + return STRICT_COMMA_OR_PERIOD_LIKE; + } else { + return set1.cloneAsThawed().addAll(set2).freeze(); + } + } +} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java deleted file mode 100644 index 51ed99c2ea7..00000000000 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java +++ /dev/null @@ -1,48 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.number.parse; - -import com.ibm.icu.text.UnicodeSet; - -/** - * @author sffc - * - */ -public class WhitespaceMatcher implements NumberParseMatcher { - - // This set was decided after discussion with icu-design@. See ticket #13309. - // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). - private static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze(); - - private static final WhitespaceMatcher INSTANCE = new WhitespaceMatcher(); - - public static WhitespaceMatcher getInstance() { - return INSTANCE; - } - - private WhitespaceMatcher() { - } - - @Override - public boolean match(StringSegment segment, ParsedNumber result) { - while (segment.length() > 0) { - int cp = segment.getCodePoint(); - if (cp == -1 || !UNISET_WHITESPACE.contains(cp)) { - break; - } - segment.adjustOffset(Character.charCount(cp)); - // Note: Do not touch the charsConsumed. - } - return segment.length() == 0 || segment.isLeadingSurrogate(); - } - - @Override - public void postProcess(ParsedNumber result) { - // No-op - } - - @Override - public String toString() { - return ""; - } -} diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt index 682c7adc730..ae7294b55f7 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt @@ -975,8 +975,7 @@ set locale en set pattern '-'#y begin parse output breaks -// FIXME --45y 45 P +-45y 45 test parse with locale symbols // The grouping separator in it_CH is an apostrophe @@ -1039,7 +1038,7 @@ USD53.45 53.45 USD (USD 7.926) -7.926 USD CJ USD (7.926) -7.926 USD CJ USD (7.92) -7.92 USD CJ -(7.92)USD -7.92 USD CJP +(7.92)USD -7.92 USD CJ USD(7.92) -7.92 USD CJ (8) USD -8 USD -8 USD -8 USD C @@ -1060,10 +1059,10 @@ US Dollars (53.45) -53.45 USD CJ US Dollar (53.45) -53.45 USD CJ (53.45) US Dollar -53.45 USD US Dollars(53.45) -53.45 USD CJ -(53.45)US Dollars -53.45 USD CJP +(53.45)US Dollars -53.45 USD CJ US Dollar(53.45) -53.45 USD CJ US Dollat(53.45) fail USD -(53.45)US Dollar -53.45 USD CJP +(53.45)US Dollar -53.45 USD CJ test parse currency ISO negative @@ -1074,9 +1073,8 @@ parse output outputCurrency breaks 53.45 fail GBP £53.45 53.45 GBP $53.45 fail USD J -// FIXME: Fix the failures in this section. Positive/negative mixup. -53.45 USD 53.45 USD P -53.45 GBP 53.45 GBP P +53.45 USD 53.45 USD +53.45 GBP 53.45 GBP USD 53.45 53.45 USD J 53.45USD 53.45 USD CJ USD53.45 53.45 USD @@ -1087,12 +1085,12 @@ USD -7.926 -7.926 USD CJ -7.92USD -7.92 USD CJ USD-7.92 -7.92 USD CJ -8 USD -8 USD -67 USD 67 USD P +67 USD 67 USD 53.45$ fail USD US Dollars 53.45 53.45 USD J -53.45 US Dollars 53.45 USD P +53.45 US Dollars 53.45 USD US Dollar 53.45 53.45 USD J -53.45 US Dollar 53.45 USD P +53.45 US Dollar 53.45 USD US Dollars53.45 53.45 USD 53.45US Dollars 53.45 USD CJ US Dollar53.45 53.45 USD @@ -1114,16 +1112,16 @@ $53.45 fail USD J USD 53.45 53.45 USD J 53.45USD 53.45 USD CJ USD53.45 53.45 USD -(7.92) USD -7.92 USD P -(7.92) GBP -7.92 GBP P -(7.926) USD -7.926 USD P -(7.926 USD) -7.926 USD CJP -(USD 7.926) -7.926 USD CJP -USD (7.926) -7.926 USD CJP -USD (7.92) -7.92 USD CJP -(7.92)USD -7.92 USD CJP -USD(7.92) -7.92 USD CJP -(8) USD -8 USD P +(7.92) USD -7.92 USD +(7.92) GBP -7.92 GBP +(7.926) USD -7.926 USD +(7.926 USD) -7.926 USD CJ +(USD 7.926) -7.926 USD CJ +USD (7.926) -7.926 USD CJ +USD (7.92) -7.92 USD CJ +(7.92)USD -7.92 USD CJ +USD(7.92) -7.92 USD CJ +(8) USD -8 USD -8 USD -8 USD C 67 USD 67 USD // J throws a NullPointerException on the next case @@ -1152,16 +1150,16 @@ $53.45 fail USD J USD 53.45 53.45 USD J 53.45USD 53.45 USD CJ USD53.45 53.45 USD -(7.92) USD -7.92 USD P -(7.92) GBP -7.92 GBP P -(7.926) USD -7.926 USD P -(7.926 USD) -7.926 USD CJP -(USD 7.926) -7.926 USD CJP -USD (7.926) -7.926 USD CJP -USD (7.92) -7.92 USD CJP -(7.92)USD -7.92 USD CJP -USD(7.92) -7.92 USD CJP -(8) USD -8 USD P +(7.92) USD -7.92 USD +(7.92) GBP -7.92 GBP +(7.926) USD -7.926 USD +(7.926 USD) -7.926 USD CJ +(USD 7.926) -7.926 USD CJ +USD (7.926) -7.926 USD CJ +USD (7.92) -7.92 USD CJ +(7.92)USD -7.92 USD CJ +USD(7.92) -7.92 USD CJ +(8) USD -8 USD -8 USD -8 USD C 67 USD 67 USD 53.45$ fail USD @@ -1190,16 +1188,16 @@ USD 53.45 53.45 USD J 53.45USD 53.45 USD CJ USD53.45 53.45 USD // S fails these because '(' is an incomplete prefix. -(7.92) USD -7.92 USD CJSP -(7.92) GBP -7.92 GBP CJSP -(7.926) USD -7.926 USD CJSP -(7.926 USD) -7.926 USD CJSP -(USD 7.926) -7.926 USD JP -USD (7.926) -7.926 USD CJSP -USD (7.92) -7.92 USD CJSP -(7.92)USD -7.92 USD CJSP -USD(7.92) -7.92 USD CJSP -(8) USD -8 USD CJSP +(7.92) USD -7.92 USD CJS +(7.92) GBP -7.92 GBP CJS +(7.926) USD -7.926 USD CJS +(7.926 USD) -7.926 USD CJS +(USD 7.926) -7.926 USD J +USD (7.926) -7.926 USD CJS +USD (7.92) -7.92 USD CJS +(7.92)USD -7.92 USD CJS +USD(7.92) -7.92 USD CJS +(8) USD -8 USD CJS -8 USD -8 USD C 67 USD 67 USD C 53.45$ fail USD @@ -1290,16 +1288,17 @@ Euros 7.82 7.82 EUR test parse currency without currency mode // Should accept a symbol associated with the currency specified by the API, // but should not traverse the full currency data. +// P always traverses full currency data. set locale en_US set pattern \u00a4#,##0.00 begin parse currency output breaks $52.41 USD 52.41 USD52.41 USD 52.41 K -\u20ac52.41 USD fail -EUR52.41 USD fail -$52.41 EUR fail -USD52.41 EUR fail +\u20ac52.41 USD fail P +EUR52.41 USD fail P +$52.41 EUR fail P +USD52.41 EUR fail P \u20ac52.41 EUR 52.41 K EUR52.41 EUR 52.41 @@ -1361,12 +1360,13 @@ set decimalPatternMatchRequired 1 begin pattern parse output breaks // K doesn't support this feature. +// P stops parsing when it sees the decimal separator, but doesn't fail. 0 123 123 -0 123. fail CJK -0 1.23 fail CJK +0 123. fail CJKP +0 1.23 fail CJKP 0 -513 -513 -0 -513. fail CJK -0 -5.13 fail CJK +0 -513. fail CJKP +0 -5.13 fail CJKP 0.0 123 fail K 0.0 123. 123 C 0.0 1.23 1.23 C diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java index bbe70cc0853..15ae14a7d1f 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java @@ -10,6 +10,7 @@ import org.junit.Test; import com.ibm.icu.impl.number.AffixUtils; import com.ibm.icu.impl.number.AffixUtils.SymbolProvider; import com.ibm.icu.impl.number.NumberStringBuilder; +import com.ibm.icu.text.UnicodeSet; public class AffixUtilsTest { @@ -218,20 +219,22 @@ public class AffixUtilsTest { } @Test - public void testRemoveSymbols() { + public void testWithoutSymbolsOrIgnorables() { String[][] cases = { {"", ""}, {"-", ""}, + {" ", ""}, {"'-'", "-"}, - {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤", "abcdefgh"}, + {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤i\tj", "abcdefghij"}, }; + UnicodeSet ignorables = new UnicodeSet("[:whitespace:]"); StringBuilder sb = new StringBuilder(); for (String[] cas : cases) { String input = cas[0]; String expected = cas[1]; sb.setLength(0); - AffixUtils.removeSymbols(input, sb); + AffixUtils.withoutSymbolsOrIgnorables(input, ignorables, sb); assertEquals("Removing symbols from: " + input, expected, sb.toString()); } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 60d86aea14a..ee7a73cbc9b 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -52,10 +52,17 @@ public class NumberParserTest { { 3, "a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 14, 51423. }, { 3, "-a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. }, { 3, "a -𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. }, - { 3, "𝟱𝟭𝟰𝟮𝟯", "0;(0)", 10, 51423. }, - { 3, "(𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 12, -51423. }, - { 3, "𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 11, -51423. }, - { 3, "(𝟱𝟭𝟰𝟮𝟯", "0;(0)", 11, -51423. }, + { 3, "𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 10, 51423. }, + { 3, "[𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, 51423. }, + { 3, "𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 11, 51423. }, + { 3, "[𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 12, 51423. }, + { 3, "(𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, -51423. }, + { 3, "𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 11, -51423. }, + { 3, "(𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 12, -51423. }, + { 3, "𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 10, 51423. }, + { 3, "{𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 11, 51423. }, + { 3, "𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 11, 51423. }, + { 3, "{𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 12, 51423. }, { 1, "a40b", "a0'0b'", 3, 40. }, // greedy code path thinks "40" is the number { 2, "a40b", "a0'0b'", 4, 4. }, // slow code path find the suffix "0b" { 3, "𝟱.𝟭𝟰𝟮E𝟯", "0", 12, 5142. }, -- 2.40.0