ICU-13513 Assorted fixes to AffixPatternMatcher and other classes. Found and fixed...

author Shane Carr <shane@unicode.org>

Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)

committer Shane Carr <shane@unicode.org>

Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)
author Shane Carr <shane@unicode.org>
Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)
committer Shane Carr <shane@unicode.org>
Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java

index 10d7e07a880a1228cfbad8db5669a576345c89a7..e57894084f09532256a060c829a5420e15482f08 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
@@ -176,12 +176,6 @@ public class AffixMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        // This is a stub implementation.
-        throw new AssertionError();
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // Check to see if our affix is the one that was matched. If so, set the flags in the result.
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java

index 94b3fd740113c994d07cf019b6a3051b17c24272..69416baf1584f67435a7453a908ca02236a56c7f 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java
@@ -59,12 +59,17 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
      @Override
      public void consumeToken(int typeOrCp) {
          // This is called by AffixUtils.iterateWithConsumer() for each token.
+
+        // Add an ignorables matcher between tokens except between two literals, and don't put two
+        // ignorables matchers in a row.
+        if (ignorables != null
+                && length() > 0
+                && (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp))) {
+            addMatcher(ignorables);
+        }
+
          if (typeOrCp < 0) {
-            // Don't add more than two ignorables matchers in a row
-            if (ignorables != null
-                    && (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp))) {
-                addMatcher(ignorables);
-            }
+            // Case 1: the token is a symbol.
              switch (typeOrCp) {
              case AffixUtils.TYPE_MINUS_SIGN:
                  addMatcher(factory.minusSign());
@@ -89,16 +94,13 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
              default:
                  throw new AssertionError();
              }
+
          } else if (ignorables != null && ignorables.getSet().contains(typeOrCp)) {
-            // Don't add more than two ignorables matchers in a row
-            if (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp)) {
-                addMatcher(ignorables);
-            }
+            // Case 2: the token is an ignorable literal.
+            // No action necessary: the ignorables matcher has already been added.
+
          } else {
-            // Start of a literal: add ignorables matcher if the previous token was a symbol
-            if (ignorables != null && lastTypeOrCp < 0) {
-                addMatcher(ignorables);
-            }
+            // Case 3: the token is a non-ignorable literal.
              addMatcher(CodePointMatcher.getInstance(typeOrCp));
          }
          lastTypeOrCp = typeOrCp;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java

index 18a9bf67943b6787e5efb0029d74d3ce90898ccb..3470fa22b73510daf8a33fd270dfd88e69a06bdb 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java
@@ -8,7 +8,8 @@ import java.util.List;
  import com.ibm.icu.text.UnicodeSet;
  
  /**
- * Composes a number of matchers, and succeeds if any of the matchers succeed.
+ * Composes a number of matchers, and succeeds if any of the matchers succeed. Always greedily chooses
+ * the first matcher in the list to succeed.
   *
   * @author sffc
   * @see SeriesMatcher
@@ -37,20 +38,17 @@ public class AnyMatcher implements NumberParseMatcher {
              return false;
          }
  
-        // TODO: Give a nice way to reset ParsedNumber to avoid the copy here.
-        ParsedNumber backup = new ParsedNumber();
-        backup.copyFrom(result);
-
          int initialOffset = segment.getOffset();
          boolean maybeMore = false;
          for (int i = 0; i < matchers.size(); i++) {
              NumberParseMatcher matcher = matchers.get(i);
              maybeMore = maybeMore || matcher.match(segment, result);
              if (segment.getOffset() != initialOffset) {
-                // Match succeeded. Return true here to be safe.
-                // TODO: Better would be to run each matcher and return true only if at least one of the
-                // matchers returned true.
-                return true;
+                // Match succeeded.
+                // NOTE: Except for a couple edge cases, if a matcher accepted string A, then it will
+                // accept any string starting with A. Therefore, there is no possibility that matchers
+                // later in the list may be evaluated on longer strings, and we can exit the loop here.
+                break;
              }
          }
  
@@ -65,6 +63,10 @@ public class AnyMatcher implements NumberParseMatcher {
              return UnicodeSet.EMPTY;
          }
  
+        if (matchers.size() == 1) {
+            return matchers.get(0).getLeadCodePoints();
+        }
+
          UnicodeSet leadCodePoints = new UnicodeSet();
          for (int i = 0; i < matchers.size(); i++) {
              NumberParseMatcher matcher = matchers.get(i);
@@ -73,22 +75,6 @@ public class AnyMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        assert frozen;
-        if (matchers == null) {
-            return true;
-        }
-
-        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            if (matcher.matchesEmpty()) {
-                return true;
-            }
-        }
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          assert frozen;
@@ -104,7 +90,7 @@ public class AnyMatcher implements NumberParseMatcher {
  
      @Override
      public String toString() {
-        return "<SeriesMatcher " + matchers + ">";
+        return "<AnyMatcher " + matchers + ">";
      }
  
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java

index 048692978d72d512fdb19ee262ca7186ff4a3ab8..385e73a5d89cd93fa9bfffc298de78884f067a20 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
@@ -36,11 +36,6 @@ public class CodePointMatcher implements NumberParseMatcher {
          return new UnicodeSet().add(cp).freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java

index 33820d57a8ce51a102bc89e0a17e8f557ed21f85..e760a0142b835e2a129ab5b2b0d6079d3e4c1ad8 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
@@ -58,11 +58,6 @@ public class CurrencyMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java

index e57adf9e4d98998480828341c494c05ddfb69ec0..239949ec13db525e61d11fa6bd7a7ff416832ff4 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java
@@ -58,11 +58,6 @@ public class CurrencyTrieMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java

index 20fd10da73e5809914099021772b517e7c51e084..e804fe55843a42234f39576cb9ec4cc10fc3390b 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
@@ -333,11 +333,6 @@ public class DecimalMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java

index 854af3cde6b953439f54a74d14ee46cd74ff9fbe..125dd1078fe2652b499a8849c48319866eba2070 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java
@@ -8,7 +8,7 @@ import com.ibm.icu.text.UnicodeSet;
   * @author sffc
   *
   */
-public class IgnorablesMatcher extends RangeMatcher {
+public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible {
  
      public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
              UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES));
@@ -22,18 +22,7 @@ public class IgnorablesMatcher extends RangeMatcher {
      }
  
      private IgnorablesMatcher(UnicodeSet ignorables) {
-        super(ignorables);
-    }
-
-    @Override
-    public UnicodeSet getLeadCodePoints() {
-        if (this == DEFAULT) {
-            return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES);
-        } else if (this == STRICT) {
-            return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.STRICT_IGNORABLES);
-        } else {
-            return super.getLeadCodePoints();
-        }
+        super("", ignorables);
      }
  
      @Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java

index 28f99975b26b691cdee9096cdeae0113bd57f020..1d576cee2cc5e1c5ff2400a1094c3c5bb714838c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
@@ -5,10 +5,27 @@ package com.ibm.icu.impl.number.parse;
  import com.ibm.icu.text.UnicodeSet;
  
  /**
- * @author sffc
+ * Given a string, there should NOT be more than one way to consume the string with the same matcher
+ * applied multiple times. If there is, the non-greedy parsing algorithm will be unhappy and may enter an
+ * exponential-time loop. For example, consider the "A Matcher" that accepts "any number of As". Given
+ * the string "AAAA", there are 2^N = 8 ways to apply the A Matcher to this string: you could have the A
+ * Matcher apply 4 times to each character; you could have it apply just once to all the characters; you
+ * could have it apply to the first 2 characters and the second 2 characters; and so on. A better version
+ * of the "A Matcher" would be for it to accept exactly one A, and allow the algorithm to run it
+ * repeatedly to consume a string of multiple As. The A Matcher can implement the Flexible interface
+ * below to signal that it can be applied multiple times in a row.
   *
+ * @author sffc
   */
  public interface NumberParseMatcher {
+
+    /**
+     * Matchers can implement the Flexible interface to indicate that they are optional and can be run
+     * repeatedly. Used by SeriesMatcher, primarily in the context of IgnorablesMatcher.
+     */
+    public interface Flexible {
+    }
+
      /**
       * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds
       * something interesting in the StringSegment, it should update the offset of the StringSegment
@@ -32,15 +49,6 @@ public interface NumberParseMatcher {
       */
      public UnicodeSet getLeadCodePoints();
  
-    /**
-     * Whether this matcher is well-defined for the empty string. Matchers that are looking for specific
-     * symbols should return false here. Matchers that are looking for any number of copies of a certain
-     * code point or string, like RangeMatcher and IgnorablesMatcher, should return true.
-     *
-     * @return Whether this matcher can accept the empty string.
-     */
-    public boolean matchesEmpty();
-
      /**
       * Method called at the end of a parse, after all matchers have failed to consume any more chars.
       * Allows a matcher to make final modifications to the result given the knowledge that no more
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java

index 1d2a81f08a2e389f7b1d1330ea4da9a448f1ecb6..38c3afbbbda29732edffe6608350d3605aa3d8cc 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@@ -33,7 +33,9 @@ import com.ibm.icu.util.ULocale;
  public class NumberParserImpl {
      @Deprecated
      public static NumberParserImpl createParserFromPattern(
-            ULocale locale, String pattern, boolean strictGrouping) {
+            ULocale locale,
+            String pattern,
+            boolean strictGrouping) {
          // Temporary frontend for testing.
  
          int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
@@ -196,7 +198,7 @@ public class NumberParserImpl {
          parser.addMatcher(InfinityMatcher.getInstance(symbols));
          String padString = properties.getPadString();
          if (padString != null && !ignorables.getSet().contains(padString)) {
-            parser.addMatcher(new PaddingMatcher(padString));
+            parser.addMatcher(PaddingMatcher.getInstance(padString));
          }
          parser.addMatcher(ignorables);
          parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
@@ -366,6 +368,7 @@ public class NumberParserImpl {
          int initialOffset = segment.getOffset();
          for (int i = 0; i < matchers.size(); i++) {
              NumberParseMatcher matcher = matchers.get(i);
+
              // In a non-greedy parse, we attempt all possible matches and pick the best.
              for (int charsToConsume = 0; charsToConsume < segment.length();) {
                  charsToConsume += Character.charCount(Character.codePointAt(segment, charsToConsume));
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java

index 21ebaf4dd27e893b14827b83f9f5cff6d6bb8aa4..64c17d79b6a9adefefc202e41a1c1de43601f604 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java
@@ -8,13 +8,14 @@ import com.ibm.icu.text.UnicodeSet;
   * @author sffc
   *
   */
-public class PaddingMatcher extends RangeMatcher {
+public class PaddingMatcher extends SymbolMatcher implements NumberParseMatcher.Flexible {
  
-    /**
-     * @param uniSet
-     */
-    protected PaddingMatcher(String padString) {
-        super(new UnicodeSet().add(padString).freeze());
+    public static PaddingMatcher getInstance(String padString) {
+        return new PaddingMatcher(padString);
+    }
+
+    private PaddingMatcher(String symbolString) {
+        super(symbolString, UnicodeSet.EMPTY);
      }
  
      @Override
@@ -29,6 +30,6 @@ public class PaddingMatcher extends RangeMatcher {
  
      @Override
      public String toString() {
-        return "<PaddingMatcher " + uniSet + ">";
+        return "<PaddingMatcher>";
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java

deleted file mode 100644 (file)

index 129780c..0000000
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java
+++ /dev/null
@@ -1,66 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.text.UnicodeSet;
-
-/**
- * @author sffc
- *
- */
-public abstract class RangeMatcher implements NumberParseMatcher {
-    protected final UnicodeSet uniSet;
-
-    protected RangeMatcher(UnicodeSet uniSet) {
-        this.uniSet = uniSet;
-    }
-
-    public UnicodeSet getSet() {
-        return uniSet;
-    }
-
-    @Override
-    public boolean match(StringSegment segment, ParsedNumber result) {
-        // Smoke test first; this matcher might be disabled.
-        if (isDisabled(result)) {
-            return false;
-        }
-
-        while (segment.length() > 0) {
-            int cp = segment.getCodePoint();
-            if (cp != -1 && uniSet.contains(cp)) {
-                segment.adjustOffset(Character.charCount(cp));
-                accept(segment, result);
-                continue;
-            }
-
-            // If we get here, the code point didn't match the uniSet.
-            return false;
-        }
-
-        // If we get here, we consumed the entire string segment.
-        return true;
-    }
-
-    @Override
-    public UnicodeSet getLeadCodePoints() {
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        ParsingUtils.putLeadCodePoints(uniSet, leadCodePoints);
-        return leadCodePoints.freeze();
-    }
-
-    @Override
-    public boolean matchesEmpty() {
-        return true;
-    }
-
-    @Override
-    public void postProcess(ParsedNumber result) {
-        // No-op
-    }
-
-    protected abstract boolean isDisabled(ParsedNumber result);
-
-    protected abstract void accept(StringSegment segment, ParsedNumber result);
-
-}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java

index 2559e59ab8e05611ddca868606aad46e2a08e654..c05e75fa80e4ef0aae995708792dd0aac8f814e1 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@@ -93,11 +93,6 @@ public class ScientificMatcher implements NumberParseMatcher {
          }
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java

index fcbe754609258dd4f7cc450bd134a34ffad0f383..7f8f6704b07e9f7bb4a11629eb241be05c018e80 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java
@@ -31,6 +31,10 @@ public class SeriesMatcher implements NumberParseMatcher {
          frozen = true;
      }
  
+    public int length() {
+        return matchers == null ? 0 : matchers.size();
+    }
+
      @Override
      public boolean match(StringSegment segment, ParsedNumber result) {
          assert frozen;
@@ -44,7 +48,7 @@ public class SeriesMatcher implements NumberParseMatcher {
  
          int initialOffset = segment.getOffset();
          boolean maybeMore = true;
-        for (int i = 0; i < matchers.size(); i++) {
+        for (int i = 0; i < matchers.size();) {
              NumberParseMatcher matcher = matchers.get(i);
              int matcherOffset = segment.getOffset();
              if (segment.length() != 0) {
@@ -53,8 +57,19 @@ public class SeriesMatcher implements NumberParseMatcher {
                  // Nothing for this matcher to match; ask for more.
                  maybeMore = true;
              }
-            if (segment.getOffset() == matcherOffset && !matcher.matchesEmpty()) {
-                // Match failed.
+
+            boolean success = (segment.getOffset() != matcherOffset);
+            boolean isFlexible = matcher instanceof NumberParseMatcher.Flexible;
+            if (success && isFlexible) {
+                // Match succeeded, and this is a flexible matcher. Re-run it.
+            } else if (success) {
+                // Match succeeded, and this is NOT a flexible matcher. Proceed to the next matcher.
+                i++;
+            } else if (isFlexible) {
+                // Match failed, and this is a flexible matcher. Try again with the next matcher.
+                i++;
+            } else {
+                // Match failed, and this is NOT a flexible matcher. Exit.
                  segment.setOffset(initialOffset);
                  result.copyFrom(backup);
                  return maybeMore;
@@ -72,35 +87,9 @@ public class SeriesMatcher implements NumberParseMatcher {
              return UnicodeSet.EMPTY;
          }
  
-        if (!matchers.get(0).matchesEmpty()) {
-            return matchers.get(0).getLeadCodePoints();
-        }
-
-        UnicodeSet leadCodePoints = new UnicodeSet();
-        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            leadCodePoints.addAll(matcher.getLeadCodePoints());
-            if (!matcher.matchesEmpty()) {
-                break;
-            }
-        }
-        return leadCodePoints.freeze();
-    }
-
-    @Override
-    public boolean matchesEmpty() {
-        assert frozen;
-        if (matchers == null) {
-            return true;
-        }
-
-        for (int i = 0; i < matchers.size(); i++) {
-            NumberParseMatcher matcher = matchers.get(i);
-            if (!matcher.matchesEmpty()) {
-                return false;
-            }
-        }
-        return true;
+        // SeriesMatchers are never allowed to start with a Flexible matcher.
+        assert !(matchers.get(0) instanceof NumberParseMatcher.Flexible);
+        return matchers.get(0).getLeadCodePoints();
      }
  
      @Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java

index d483d3d565db202adc91e7ba86c9a3c163a7b31b..e31841e6872908b08fb905e568ca33ee562ff43c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
@@ -25,6 +25,10 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
          uniSet = UnicodeSetStaticCache.get(key);
      }
  
+    public UnicodeSet getSet() {
+        return uniSet;
+    }
+
      @Override
      public boolean match(StringSegment segment, ParsedNumber result) {
          // Smoke test first; this matcher might be disabled.
@@ -55,7 +59,7 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
  
      @Override
      public UnicodeSet getLeadCodePoints() {
-        if (string == null || string.isEmpty()) {
+        if (string.isEmpty()) {
              // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
              return uniSet;
          }
@@ -66,11 +70,6 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
          return leadCodePoints.freeze();
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public void postProcess(ParsedNumber result) {
          // No-op
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java

index cde7292d0e6f29fc3d8c7c0725c4fb43ea424452..bfe5a6b54919b9c93add7e0fb6d7005e89a16e7a 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java
@@ -14,11 +14,6 @@ public abstract class ValidationMatcher implements NumberParseMatcher {
          return false;
      }
  
-    @Override
-    public boolean matchesEmpty() {
-        return false;
-    }
-
      @Override
      public UnicodeSet getLeadCodePoints() {
          return UnicodeSet.EMPTY;
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt

index 80ad592cf6c039c1857b68d4a3cc0ca0292669d7..2e3c634bb5f55f7484209bcc51382f619abe3a11 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
@@ -1492,13 +1492,12 @@ y gh56  -56     JK
  y g h56        -56     JK
  // S stops parsing after the 'i' for these and returns -56
  // C stops before the 'i' and gets 56
-// P does not allow ignorables between the 'j' and the 'k'
  56ijk  -56     CJK
  56i jk -56     CJK
-56ij k -56     CJKP
-56i‎j‎k    -56     CJKP
+56ij k -56     CJK
+56i‎j‎k    -56     CJK
  56ijk  -56     CJK
-56i j‎k      -56     CJKP
+56i j‎k      -56     CJK
  56‎i jk      -56     CJK
  // S and C get 56 (accepts ' ' gs grouping); J and K get null
  5 6    fail    CSP
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java

index 50bef581f9a8c34b7142d61e683e1325bdf3d56e..0cabbe8c9f8b1f434890dab55ac0543ce9ac7fa2 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@@ -5547,8 +5547,7 @@ public class NumberFormatTest extends TestFmwk {
          ParsePosition ppos = new ParsePosition(0);
          Number result = df.parse("42\u200E%\u200E ", ppos);
          assertEquals("Should parse as percentage", new BigDecimal("0.42"), result);
-        // TODO: The following line breaks in ICU 61.
-        //assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex());
+        assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex());
          ppos.setIndex(0);
          result = df.parse("-42a\u200E ", ppos);
          assertEquals("Should not parse as percent", new Long(-42), result);
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java

index 9adc785651348d250f52dad255b5b5b3fa29bdb8..847fa7719489b604b8930bcf456500e26bc3dd13 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@@ -3,7 +3,6 @@
  package com.ibm.icu.dev.test.number;
  
  import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
  import static org.junit.Assert.assertNotNull;
  import static org.junit.Assert.assertTrue;
  
@@ -84,6 +83,7 @@ public class NumberParserTest {
                  { 3, "📺1.23", "📺0;📻0", 6, 1.23 },
                  { 3, "📻1.23", "📺0;📻0", 6, -1.23 },
                  { 3, ".00", "0", 3, 0.0 },
+                { 3, "                              0", "a0", 31, 0.0}, // should not hang
                  { 3, "0", "0", 1, 0.0 } };
  
          for (Object[] cas : cases) {
@@ -147,7 +147,6 @@ public class NumberParserTest {
      public void testSeriesMatcher() {
          DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
          SeriesMatcher series = new SeriesMatcher();
-        series.addMatcher(IgnorablesMatcher.DEFAULT);
          series.addMatcher(PlusSignMatcher.getInstance(symbols));
          series.addMatcher(MinusSignMatcher.getInstance(symbols));
          series.addMatcher(IgnorablesMatcher.DEFAULT);
@@ -155,23 +154,21 @@ public class NumberParserTest {
          series.addMatcher(IgnorablesMatcher.DEFAULT);
          series.freeze();
  
-        assertEquals(UnicodeSetStaticCache.get(Key.DEFAULT_IGNORABLES).cloneAsThawed()
-                .addAll(UnicodeSetStaticCache.get(Key.PLUS_SIGN)), series.getLeadCodePoints());
-        assertFalse(series.matchesEmpty());
+        assertEquals(UnicodeSetStaticCache.get(Key.PLUS_SIGN), series.getLeadCodePoints());
  
          Object[][] cases = new Object[][] {
                  { "", 0, true },
-                { " ", 0, true },
+                { " ", 0, false },
                  { "$", 0, false },
                  { "+", 0, true },
-                { " +", 0, true },
-                { " + ", 0, false },
+                { " +", 0, false },
                  { "+-", 0, true },
                  { "+ -", 0, false },
                  { "+-  ", 0, true },
                  { "+-  $", 0, false },
                  { "+-%", 3, true },
-                { "  +-  %  ", 9, true },
+                { "  +-  %  ", 0, false },
+                { "+-  %  ", 7, true },
                  { "+-%$", 3, false } };
          for (Object[] cas : cases) {
              String input = (String) cas[0];
author	Shane Carr <shane@unicode.org>
	Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)
committer	Shane Carr <shane@unicode.org>
	Wed, 24 Jan 2018 02:32:03 +0000 (02:32 +0000)
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AnyMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyTrieMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/PaddingMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RangeMatcher.java	[deleted file]	patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeriesMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ValidationMatcher.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java		patch \| blob \| history