From a6a18243b1d63a0a1fda72ebe90aee1c64933fe0 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Wed, 24 Jan 2018 02:32:03 +0000 Subject: [PATCH] ICU-13513 Assorted fixes to AffixPatternMatcher and other classes. Found and fixed exponential loop in non-greedy parser. X-SVN-Rev: 40797 --- .../icu/impl/number/parse/AffixMatcher.java | 6 -- .../number/parse/AffixPatternMatcher.java | 28 ++++---- .../ibm/icu/impl/number/parse/AnyMatcher.java | 38 ++++------- .../impl/number/parse/CodePointMatcher.java | 5 -- .../impl/number/parse/CurrencyMatcher.java | 5 -- .../number/parse/CurrencyTrieMatcher.java | 5 -- .../icu/impl/number/parse/DecimalMatcher.java | 5 -- .../impl/number/parse/IgnorablesMatcher.java | 15 +---- .../impl/number/parse/NumberParseMatcher.java | 28 +++++--- .../impl/number/parse/NumberParserImpl.java | 7 +- .../icu/impl/number/parse/PaddingMatcher.java | 15 +++-- .../icu/impl/number/parse/RangeMatcher.java | 66 ------------------- .../impl/number/parse/ScientificMatcher.java | 5 -- .../icu/impl/number/parse/SeriesMatcher.java | 53 ++++++--------- .../icu/impl/number/parse/SymbolMatcher.java | 11 ++-- .../impl/number/parse/ValidationMatcher.java | 5 -- .../data/numberformattestspecification.txt | 7 +- .../icu/dev/test/format/NumberFormatTest.java | 3 +- .../icu/dev/test/number/NumberParserTest.java | 15 ++--- 19 files changed, 96 insertions(+), 226 deletions(-) delete mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java index 10d7e07a880..e57894084f0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java @@ -176,12 +176,6 @@ public class AffixMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - // This is a stub implementation. - throw new AssertionError(); - } - @Override public void postProcess(ParsedNumber result) { // Check to see if our affix is the one that was matched. If so, set the flags in the result. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java index 94b3fd74011..69416baf158 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java @@ -59,12 +59,17 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok @Override public void consumeToken(int typeOrCp) { // This is called by AffixUtils.iterateWithConsumer() for each token. + + // Add an ignorables matcher between tokens except between two literals, and don't put two + // ignorables matchers in a row. + if (ignorables != null + && length() > 0 + && (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp))) { + addMatcher(ignorables); + } + if (typeOrCp < 0) { - // Don't add more than two ignorables matchers in a row - if (ignorables != null - && (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp))) { - addMatcher(ignorables); - } + // Case 1: the token is a symbol. switch (typeOrCp) { case AffixUtils.TYPE_MINUS_SIGN: addMatcher(factory.minusSign()); @@ -89,16 +94,13 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok default: throw new AssertionError(); } + } else if (ignorables != null && ignorables.getSet().contains(typeOrCp)) { - // Don't add more than two ignorables matchers in a row - if (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp)) { - addMatcher(ignorables); - } + // Case 2: the token is an ignorable literal. + // No action necessary: the ignorables matcher has already been added. + } else { - // Start of a literal: add ignorables matcher if the previous token was a symbol - if (ignorables != null && lastTypeOrCp < 0) { - addMatcher(ignorables); - } + // Case 3: the token is a non-ignorable literal. addMatcher(CodePointMatcher.getInstance(typeOrCp)); } lastTypeOrCp = typeOrCp; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java index 18a9bf67943..3470fa22b73 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java @@ -8,7 +8,8 @@ import java.util.List; import com.ibm.icu.text.UnicodeSet; /** - * Composes a number of matchers, and succeeds if any of the matchers succeed. + * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses + * the first matcher in the list to succeed. * * @author sffc * @see SeriesMatcher @@ -37,20 +38,17 @@ public class AnyMatcher implements NumberParseMatcher { return false; } - // TODO: Give a nice way to reset ParsedNumber to avoid the copy here. - ParsedNumber backup = new ParsedNumber(); - backup.copyFrom(result); - int initialOffset = segment.getOffset(); boolean maybeMore = false; for (int i = 0; i < matchers.size(); i++) { NumberParseMatcher matcher = matchers.get(i); maybeMore = maybeMore || matcher.match(segment, result); if (segment.getOffset() != initialOffset) { - // Match succeeded. Return true here to be safe. - // TODO: Better would be to run each matcher and return true only if at least one of the - // matchers returned true. - return true; + // Match succeeded. + // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will + // accept any string starting with A. Therefore, there is no possibility that matchers + // later in the list may be evaluated on longer strings, and we can exit the loop here. + break; } } @@ -65,6 +63,10 @@ public class AnyMatcher implements NumberParseMatcher { return UnicodeSet.EMPTY; } + if (matchers.size() == 1) { + return matchers.get(0).getLeadCodePoints(); + } + UnicodeSet leadCodePoints = new UnicodeSet(); for (int i = 0; i < matchers.size(); i++) { NumberParseMatcher matcher = matchers.get(i); @@ -73,22 +75,6 @@ public class AnyMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - assert frozen; - if (matchers == null) { - return true; - } - - for (int i = 0; i < matchers.size(); i++) { - NumberParseMatcher matcher = matchers.get(i); - if (matcher.matchesEmpty()) { - return true; - } - } - return false; - } - @Override public void postProcess(ParsedNumber result) { assert frozen; @@ -104,7 +90,7 @@ public class AnyMatcher implements NumberParseMatcher { @Override public String toString() { - return ""; + return ""; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java index 048692978d7..385e73a5d89 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java @@ -36,11 +36,6 @@ public class CodePointMatcher implements NumberParseMatcher { return new UnicodeSet().add(cp).freeze(); } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java index 33820d57a8c..e760a0142b8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java @@ -58,11 +58,6 @@ public class CurrencyMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java index e57adf9e4d9..239949ec13d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java @@ -58,11 +58,6 @@ public class CurrencyTrieMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java index 20fd10da73e..e804fe55843 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java @@ -333,11 +333,6 @@ public class DecimalMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java index 854af3cde6b..125dd1078fe 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java @@ -8,7 +8,7 @@ import com.ibm.icu.text.UnicodeSet; * @author sffc * */ -public class IgnorablesMatcher extends RangeMatcher { +public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible { public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher( UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES)); @@ -22,18 +22,7 @@ public class IgnorablesMatcher extends RangeMatcher { } private IgnorablesMatcher(UnicodeSet ignorables) { - super(ignorables); - } - - @Override - public UnicodeSet getLeadCodePoints() { - if (this == DEFAULT) { - return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES); - } else if (this == STRICT) { - return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.STRICT_IGNORABLES); - } else { - return super.getLeadCodePoints(); - } + super("", ignorables); } @Override diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java index 28f99975b26..1d576cee2cc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java @@ -5,10 +5,27 @@ package com.ibm.icu.impl.number.parse; import com.ibm.icu.text.UnicodeSet; /** - * @author sffc + * Given a string, there should NOT be more than one way to consume the string with the same matcher + * applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an + * exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given + * the string "AAAA", there are 2^N = 8 ways to apply the A Matcher to this string: you could have the A + * Matcher apply 4 times to each character; you could have it apply just once to all the characters; you + * could have it apply to the first 2 characters and the second 2 characters; and so on. A better version + * of the "A Matcher" would be for it to accept exactly one A, and allow the algorithm to run it + * repeatedly to consume a string of multiple As. The A Matcher can implement the Flexible interface + * below to signal that it can be applied multiple times in a row. * + * @author sffc */ public interface NumberParseMatcher { + + /** + * Matchers can implement the Flexible interface to indicate that they are optional and can be run + * repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher. + */ + public interface Flexible { + } + /** * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds * something interesting in the StringSegment, it should update the offset of the StringSegment @@ -32,15 +49,6 @@ public interface NumberParseMatcher { */ public UnicodeSet getLeadCodePoints(); - /** - * Whether this matcher is well-defined for the empty string. Matchers that are looking for specific - * symbols should return false here. Matchers that are looking for any number of copies of a certain - * code point or string, like RangeMatcher and IgnorablesMatcher, should return true. - * - * @return Whether this matcher can accept the empty string. - */ - public boolean matchesEmpty(); - /** * Method called at the end of a parse, after all matchers have failed to consume any more chars. * Allows a matcher to make final modifications to the result given the knowledge that no more diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 1d2a81f08a2..38c3afbbbda 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -33,7 +33,9 @@ import com.ibm.icu.util.ULocale; public class NumberParserImpl { @Deprecated public static NumberParserImpl createParserFromPattern( - ULocale locale, String pattern, boolean strictGrouping) { + ULocale locale, + String pattern, + boolean strictGrouping) { // Temporary frontend for testing. int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE @@ -196,7 +198,7 @@ public class NumberParserImpl { parser.addMatcher(InfinityMatcher.getInstance(symbols)); String padString = properties.getPadString(); if (padString != null && !ignorables.getSet().contains(padString)) { - parser.addMatcher(new PaddingMatcher(padString)); + parser.addMatcher(PaddingMatcher.getInstance(padString)); } parser.addMatcher(ignorables); parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); @@ -366,6 +368,7 @@ public class NumberParserImpl { int initialOffset = segment.getOffset(); for (int i = 0; i < matchers.size(); i++) { NumberParseMatcher matcher = matchers.get(i); + // In a non-greedy parse, we attempt all possible matches and pick the best. for (int charsToConsume = 0; charsToConsume < segment.length();) { charsToConsume += Character.charCount(Character.codePointAt(segment, charsToConsume)); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java index 21ebaf4dd27..64c17d79b6a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java @@ -8,13 +8,14 @@ import com.ibm.icu.text.UnicodeSet; * @author sffc * */ -public class PaddingMatcher extends RangeMatcher { +public class PaddingMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible { - /** - * @param uniSet - */ - protected PaddingMatcher(String padString) { - super(new UnicodeSet().add(padString).freeze()); + public static PaddingMatcher getInstance(String padString) { + return new PaddingMatcher(padString); + } + + private PaddingMatcher(String symbolString) { + super(symbolString, UnicodeSet.EMPTY); } @Override @@ -29,6 +30,6 @@ public class PaddingMatcher extends RangeMatcher { @Override public String toString() { - return ""; + return ""; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java deleted file mode 100644 index 129780c871d..00000000000 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java +++ /dev/null @@ -1,66 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.number.parse; - -import com.ibm.icu.text.UnicodeSet; - -/** - * @author sffc - * - */ -public abstract class RangeMatcher implements NumberParseMatcher { - protected final UnicodeSet uniSet; - - protected RangeMatcher(UnicodeSet uniSet) { - this.uniSet = uniSet; - } - - public UnicodeSet getSet() { - return uniSet; - } - - @Override - public boolean match(StringSegment segment, ParsedNumber result) { - // Smoke test first; this matcher might be disabled. - if (isDisabled(result)) { - return false; - } - - while (segment.length() > 0) { - int cp = segment.getCodePoint(); - if (cp != -1 && uniSet.contains(cp)) { - segment.adjustOffset(Character.charCount(cp)); - accept(segment, result); - continue; - } - - // If we get here, the code point didn't match the uniSet. - return false; - } - - // If we get here, we consumed the entire string segment. - return true; - } - - @Override - public UnicodeSet getLeadCodePoints() { - UnicodeSet leadCodePoints = new UnicodeSet(); - ParsingUtils.putLeadCodePoints(uniSet, leadCodePoints); - return leadCodePoints.freeze(); - } - - @Override - public boolean matchesEmpty() { - return true; - } - - @Override - public void postProcess(ParsedNumber result) { - // No-op - } - - protected abstract boolean isDisabled(ParsedNumber result); - - protected abstract void accept(StringSegment segment, ParsedNumber result); - -} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java index 2559e59ab8e..c05e75fa80e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java @@ -93,11 +93,6 @@ public class ScientificMatcher implements NumberParseMatcher { } } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java index fcbe7546092..7f8f6704b07 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java @@ -31,6 +31,10 @@ public class SeriesMatcher implements NumberParseMatcher { frozen = true; } + public int length() { + return matchers == null ? 0 : matchers.size(); + } + @Override public boolean match(StringSegment segment, ParsedNumber result) { assert frozen; @@ -44,7 +48,7 @@ public class SeriesMatcher implements NumberParseMatcher { int initialOffset = segment.getOffset(); boolean maybeMore = true; - for (int i = 0; i < matchers.size(); i++) { + for (int i = 0; i < matchers.size();) { NumberParseMatcher matcher = matchers.get(i); int matcherOffset = segment.getOffset(); if (segment.length() != 0) { @@ -53,8 +57,19 @@ public class SeriesMatcher implements NumberParseMatcher { // Nothing for this matcher to match; ask for more. maybeMore = true; } - if (segment.getOffset() == matcherOffset && !matcher.matchesEmpty()) { - // Match failed. + + boolean success = (segment.getOffset() != matcherOffset); + boolean isFlexible = matcher instanceof NumberParseMatcher.Flexible; + if (success && isFlexible) { + // Match succeeded, and this is a flexible matcher. Re-run it. + } else if (success) { + // Match succeeded, and this is NOT a flexible matcher. Proceed to the next matcher. + i++; + } else if (isFlexible) { + // Match failed, and this is a flexible matcher. Try again with the next matcher. + i++; + } else { + // Match failed, and this is NOT a flexible matcher. Exit. segment.setOffset(initialOffset); result.copyFrom(backup); return maybeMore; @@ -72,35 +87,9 @@ public class SeriesMatcher implements NumberParseMatcher { return UnicodeSet.EMPTY; } - if (!matchers.get(0).matchesEmpty()) { - return matchers.get(0).getLeadCodePoints(); - } - - UnicodeSet leadCodePoints = new UnicodeSet(); - for (int i = 0; i < matchers.size(); i++) { - NumberParseMatcher matcher = matchers.get(i); - leadCodePoints.addAll(matcher.getLeadCodePoints()); - if (!matcher.matchesEmpty()) { - break; - } - } - return leadCodePoints.freeze(); - } - - @Override - public boolean matchesEmpty() { - assert frozen; - if (matchers == null) { - return true; - } - - for (int i = 0; i < matchers.size(); i++) { - NumberParseMatcher matcher = matchers.get(i); - if (!matcher.matchesEmpty()) { - return false; - } - } - return true; + // SeriesMatchers are never allowed to start with a Flexible matcher. + assert !(matchers.get(0) instanceof NumberParseMatcher.Flexible); + return matchers.get(0).getLeadCodePoints(); } @Override diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java index d483d3d565d..e31841e6872 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java @@ -25,6 +25,10 @@ public abstract class SymbolMatcher implements NumberParseMatcher { uniSet = UnicodeSetStaticCache.get(key); } + public UnicodeSet getSet() { + return uniSet; + } + @Override public boolean match(StringSegment segment, ParsedNumber result) { // Smoke test first; this matcher might be disabled. @@ -55,7 +59,7 @@ public abstract class SymbolMatcher implements NumberParseMatcher { @Override public UnicodeSet getLeadCodePoints() { - if (string == null || string.isEmpty()) { + if (string.isEmpty()) { // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints. return uniSet; } @@ -66,11 +70,6 @@ public abstract class SymbolMatcher implements NumberParseMatcher { return leadCodePoints.freeze(); } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public void postProcess(ParsedNumber result) { // No-op diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java index cde7292d0e6..bfe5a6b5491 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java @@ -14,11 +14,6 @@ public abstract class ValidationMatcher implements NumberParseMatcher { return false; } - @Override - public boolean matchesEmpty() { - return false; - } - @Override public UnicodeSet getLeadCodePoints() { return UnicodeSet.EMPTY; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt index 80ad592cf6c..2e3c634bb5f 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt @@ -1492,13 +1492,12 @@ y gh56 -56 JK y g h56 -56 JK // S stops parsing after the 'i' for these and returns -56 // C stops before the 'i' and gets 56 -// P does not allow ignorables between the 'j' and the 'k' 56ijk -56 CJK 56i jk -56 CJK -56ij k -56 CJKP -56i‎j‎k -56 CJKP +56ij k -56 CJK +56i‎j‎k -56 CJK 56ijk -56 CJK -56i j‎k -56 CJKP +56i j‎k -56 CJK 56‎i jk -56 CJK // S and C get 56 (accepts ' ' gs grouping); J and K get null 5 6 fail CSP diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java index 50bef581f9a..0cabbe8c9f8 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -5547,8 +5547,7 @@ public class NumberFormatTest extends TestFmwk { ParsePosition ppos = new ParsePosition(0); Number result = df.parse("42\u200E%\u200E ", ppos); assertEquals("Should parse as percentage", new BigDecimal("0.42"), result); - // TODO: The following line breaks in ICU 61. - //assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex()); + assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex()); ppos.setIndex(0); result = df.parse("-42a\u200E ", ppos); assertEquals("Should not parse as percent", new Long(-42), result); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 9adc7856513..847fa771948 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -3,7 +3,6 @@ package com.ibm.icu.dev.test.number; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -84,6 +83,7 @@ public class NumberParserTest { { 3, "📺1.23", "📺0;📻0", 6, 1.23 }, { 3, "📻1.23", "📺0;📻0", 6, -1.23 }, { 3, ".00", "0", 3, 0.0 }, + { 3, " 0", "a0", 31, 0.0}, // should not hang { 3, "0", "0", 1, 0.0 } }; for (Object[] cas : cases) { @@ -147,7 +147,6 @@ public class NumberParserTest { public void testSeriesMatcher() { DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH); SeriesMatcher series = new SeriesMatcher(); - series.addMatcher(IgnorablesMatcher.DEFAULT); series.addMatcher(PlusSignMatcher.getInstance(symbols)); series.addMatcher(MinusSignMatcher.getInstance(symbols)); series.addMatcher(IgnorablesMatcher.DEFAULT); @@ -155,23 +154,21 @@ public class NumberParserTest { series.addMatcher(IgnorablesMatcher.DEFAULT); series.freeze(); - assertEquals(UnicodeSetStaticCache.get(Key.DEFAULT_IGNORABLES).cloneAsThawed() - .addAll(UnicodeSetStaticCache.get(Key.PLUS_SIGN)), series.getLeadCodePoints()); - assertFalse(series.matchesEmpty()); + assertEquals(UnicodeSetStaticCache.get(Key.PLUS_SIGN), series.getLeadCodePoints()); Object[][] cases = new Object[][] { { "", 0, true }, - { " ", 0, true }, + { " ", 0, false }, { "$", 0, false }, { "+", 0, true }, - { " +", 0, true }, - { " + ", 0, false }, + { " +", 0, false }, { "+-", 0, true }, { "+ -", 0, false }, { "+- ", 0, true }, { "+- $", 0, false }, { "+-%", 3, true }, - { " +- % ", 9, true }, + { " +- % ", 0, false }, + { "+- % ", 7, true }, { "+-%$", 3, false } }; for (Object[] cas : cases) { String input = (String) cas[0]; -- 2.40.0