ICU-13513 More work on affix matchers. Refactoring code for grouping and decimal...

author Shane Carr <shane@unicode.org>

Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)

committer Shane Carr <shane@unicode.org>

Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)
author Shane Carr <shane@unicode.org>
Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)
committer Shane Carr <shane@unicode.org>
Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java

index 6052bb18e510ebd1392a5747b82a0021a50190e3..daf22c2905198d091f2abf7b16d46546c4f5e63c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java
@@ -10,6 +10,12 @@ public interface AffixPatternProvider {
      public static final int PADDING = 0x400;
    }
  
+  // Convenience compound flags
+  public static final int FLAG_POS_PREFIX = Flags.PREFIX;
+  public static final int FLAG_POS_SUFFIX = 0;
+  public static final int FLAG_NEG_PREFIX = Flags.PREFIX | Flags.NEGATIVE_SUBPATTERN;
+  public static final int FLAG_NEG_SUFFIX = Flags.NEGATIVE_SUBPATTERN;
+
    public char charAt(int flags, int i);
  
    public int length(int flags);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java

index 0e5f36cf4e7782c83884c47afe75589c1166229c..43288478d2b2eff065fe76968ce983e035823e02 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java
@@ -3,6 +3,7 @@
  package com.ibm.icu.impl.number;
  
  import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.text.UnicodeSet;
  
  /**
   * Performs manipulations on affix patterns: the prefix and suffix strings associated with a decimal
@@ -386,19 +387,30 @@ public class AffixUtils {
    }
  
    /**
-   * Appends a new affix pattern with all symbols removed. Like calling unescape with a symbol provider that always
-   * returns the empty string.
+   * Appends a new affix pattern with all symbols and code points in the given "ignorables" UnicodeSet stripped out.
+   * Similar to calling unescape with a symbol provider that always returns the empty string.
+   *
+   * <p>
+   * Accepts and returns a StringBuilder, allocating it only if necessary.
     */
-  public static void removeSymbols(CharSequence affixPattern, StringBuilder output) {
+  public static StringBuilder withoutSymbolsOrIgnorables(
+        CharSequence affixPattern,
+        UnicodeSet ignorables,
+        StringBuilder sb) {
      assert affixPattern != null;
      long tag = 0L;
      while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern);
        int typeOrCp = getTypeOrCp(tag);
-      if (typeOrCp >= 0) {
-        output.appendCodePoint(typeOrCp);
+      if (typeOrCp >= 0 && !ignorables.contains(typeOrCp)) {
+        if (sb == null) {
+          // Lazy-initialize the StringBuilder
+          sb = new StringBuilder();
+        }
+        sb.appendCodePoint(typeOrCp);
        }
      }
+    return sb;
    }
  
    /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java

index 70b7b4c3bd8ac5d6b469d4160a700036e7959b2d..c1aaf9c5df3c65e267f7202fc9c15272ac2b51aa 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
@@ -2,16 +2,13 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl.number.parse;
  
-import java.util.Collection;
+import java.util.ArrayList;
+import java.util.Collections;
  import java.util.Comparator;
-import java.util.Set;
-import java.util.TreeSet;
  
-import com.ibm.icu.impl.StandardPlural;
  import com.ibm.icu.impl.number.AffixPatternProvider;
  import com.ibm.icu.impl.number.AffixUtils;
-import com.ibm.icu.impl.number.MutablePatternModifier;
-import com.ibm.icu.impl.number.NumberStringBuilder;
+import com.ibm.icu.text.UnicodeSet;
  
  /**
   * @author sffc
@@ -43,132 +40,78 @@ public class AffixMatcher implements NumberParseMatcher {
          }
      };
  
-    /**
-     * Creates multiple AffixMatchers, enough to cover the requirements for the given pattern modifier, appending them
-     * in order to the NumberParserImpl.
-     */
-    public static void generateFromPatternModifier(
-            MutablePatternModifier patternModifier,
-            int flags,
-            boolean includeUnpaired,
-            NumberParserImpl output) {
-
-        // Store the matchers in a TreeSet to ensure both uniqueness and order.
-        Set<AffixMatcher> matchers = new TreeSet<AffixMatcher>(COMPARATOR);
-
-        // Construct one matcher per isNegative/plural combination. Most of the time, plurals aren't needed, so only
-        // two matchers will be created, one for positive and one for negative.
-        NumberStringBuilder nsb = new NumberStringBuilder();
-        boolean isNegative = false;
-        while (true) {
-            if (isNegative) {
-                flags |= ParsedNumber.FLAG_NEGATIVE;
+    public static void generateFromAffixPatternProvider(
+            AffixPatternProvider patternInfo,
+            NumberParserImpl output,
+            UnicodeSet ignorables,
+            boolean includeUnpaired) {
+        // Lazy-initialize the StringBuilder.
+        StringBuilder sb = null;
+
+        // Use initial capacity of 6, the highest possible number of AffixMatchers.
+        // TODO: Lazy-initialize?
+        ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6);
+
+        sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_PREFIX, ignorables, sb);
+        String posPrefix = toStringOrEmpty(sb);
+        sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_SUFFIX, ignorables, sb);
+        String posSuffix = toStringOrEmpty(sb);
+
+        if (!posPrefix.isEmpty() || !posSuffix.isEmpty()) {
+            matchers.add(getInstance(posPrefix, posSuffix, 0));
+            if (includeUnpaired && !posPrefix.isEmpty() && !posSuffix.isEmpty()) {
+                matchers.add(getInstance(posPrefix, "", 0));
+                matchers.add(getInstance("", posSuffix, 0));
              }
+        }
  
-            if (patternModifier.needsPlurals()) {
-                for (StandardPlural plural : StandardPlural.VALUES) {
-                    patternModifier.setNumberProperties(isNegative, plural);
-                    AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired);
+        if (patternInfo.hasNegativeSubpattern()) {
+            sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_PREFIX, ignorables, sb);
+            String negPrefix = toStringOrEmpty(sb);
+            sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_SUFFIX, ignorables, sb);
+            String negSuffix = toStringOrEmpty(sb);
+
+            if (negPrefix.equals(posPrefix) && negSuffix.equals(posSuffix)) {
+                // No-op: favor the positive AffixMatcher
+            } else if (!negPrefix.isEmpty() || !negSuffix.isEmpty()) {
+                matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG_NEGATIVE));
+                if (includeUnpaired && !negPrefix.isEmpty() && !negSuffix.isEmpty()) {
+                    if (!negPrefix.equals(posPrefix)) {
+                        matchers.add(getInstance(negPrefix, "", ParsedNumber.FLAG_NEGATIVE));
+                    }
+                    if (!negSuffix.equals(posSuffix)) {
+                        matchers.add(getInstance("", negSuffix, ParsedNumber.FLAG_NEGATIVE));
+                    }
                  }
-            } else {
-                patternModifier.setNumberProperties(isNegative, null);
-                AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired);
-            }
-
-            if (isNegative) {
-                break;
-            } else {
-                isNegative = true;
              }
          }
  
-        for (AffixMatcher matcher : matchers) {
-            output.addMatcher(matcher);
-        }
+        // Put the AffixMatchers in order, and then add them to the output.
+        Collections.sort(matchers, COMPARATOR);
+        output.addMatchers(matchers);
      }
  
-    public static void generateFromAffixPatternProvider(AffixPatternProvider patternInfo,
-            NumberParserImpl output,
-            boolean includeUnpaired) {
-        AffixMatcher positive = null;
-        AffixMatcher negative = null;
-
-        StringBuilder sb = new StringBuilder();
-        AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.PREFIX), sb);
-        String prefix = sb.toString();
-        sb.setLength(0);
-        AffixUtils.removeSymbols(patternInfo.getString(/* suffix */ 0), sb);
-        String suffix = sb.toString();
-        if (!prefix.isEmpty() || !suffix.isEmpty()) {
-            positive = new AffixMatcher(prefix, suffix, 0);
-        }
-
-        if (patternInfo.hasNegativeSubpattern()) {
+    private static StringBuilder getCleanAffix(
+            AffixPatternProvider patternInfo,
+            int flag,
+            UnicodeSet ignorables,
+            StringBuilder sb) {
+        if (sb != null) {
              sb.setLength(0);
-            AffixUtils.removeSymbols(patternInfo
-                    .getString(AffixPatternProvider.Flags.PREFIX | AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
-            prefix = sb.toString();
-            sb.setLength(0);
-            AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
-            suffix = sb.toString();
-            if (!prefix.isEmpty() || !suffix.isEmpty()) {
-                negative = new AffixMatcher(prefix, suffix, ParsedNumber.FLAG_NEGATIVE);
-            }
          }
-
-        if (positive != null && negative != null) {
-            int comparison = COMPARATOR.compare(positive, negative);
-            if (comparison > 0) {
-                appendTo(negative, output, includeUnpaired);
-                appendTo(positive, output, includeUnpaired);
-            } else if (comparison < 0) {
-                appendTo(positive, output, includeUnpaired);
-                appendTo(negative, output, includeUnpaired);
-            } else {
-                // The two candidates are equal; favor the positive one
-                appendTo(positive, output, includeUnpaired);
-            }
-        } else if (positive != null) {
-            appendTo(positive, output, includeUnpaired);
-        } else if (negative != null) {
-            appendTo(negative, output, includeUnpaired);
-        } else {
-            // No affixes to append this time
+        if (patternInfo.length(flag) > 0) {
+            sb = AffixUtils.withoutSymbolsOrIgnorables(patternInfo.getString(flag), ignorables, sb);
          }
+        return sb;
      }
  
-    private static void appendTo(AffixMatcher matcher, NumberParserImpl output, boolean includeUnpaired) {
-        output.addMatcher(matcher);
-        if (includeUnpaired && !matcher.prefix.isEmpty() && !matcher.suffix.isEmpty()) {
-            output.addMatcher(new AffixMatcher(matcher.prefix, "", matcher.flags));
-            output.addMatcher(new AffixMatcher("", matcher.suffix, matcher.flags));
-        }
+    private static String toStringOrEmpty(StringBuilder sb) {
+        return (sb == null || sb.length() == 0) ? "" : sb.toString();
      }
  
-    /**
-     * Constructs one or more AffixMatchers from the given MutablePatternModifier and flags, appending them to the given
-     * collection. The NumberStringBuilder is used as a temporary object only.
-     *
-     * @param includeUnpaired If true, create additional AffixMatchers with an unpaired prefix or suffix.
-     */
-    private static void createAndAppendTo(
-            Collection<AffixMatcher> appendTo,
-            MutablePatternModifier patternModifier,
-            int flags,
-            NumberStringBuilder nsb,
-            boolean includeUnpaired) {
-        // TODO: Make this more efficient (avoid the substrings and things)
-        nsb.clear();
-        patternModifier.apply(nsb, 0, 0);
-        int prefixLength = patternModifier.getPrefixLength();
-        String full = nsb.toString();
-        String prefix = full.substring(0, prefixLength);
-        String suffix = full.substring(prefixLength);
-        appendTo.add(new AffixMatcher(prefix, suffix, flags));
-        if (includeUnpaired && !prefix.isEmpty() && !suffix.isEmpty()) {
-            appendTo.add(new AffixMatcher(prefix, "", flags));
-            appendTo.add(new AffixMatcher("", suffix, flags));
-        }
+    private static final AffixMatcher getInstance(String prefix, String suffix, int flags) {
+        // TODO: Special handling for common cases like both strings empty.
+        return new AffixMatcher(prefix, suffix, flags);
      }
  
      private AffixMatcher(String prefix, String suffix, int flags) {
@@ -179,7 +122,7 @@ public class AffixMatcher implements NumberParseMatcher {
  
      @Override
      public boolean match(StringSegment segment, ParsedNumber result) {
-        if (result.quantity == null) {
+        if (!result.seenNumber()) {
              // Prefix
              if (result.prefix != null || prefix.length() == 0) {
                  return false;
@@ -255,6 +198,7 @@ public class AffixMatcher implements NumberParseMatcher {
  
      @Override
      public String toString() {
-        return "<AffixMatcher \"" + prefix + "\" \"" + suffix + "\">";
+        boolean isNegative = 0 != (flags & ParsedNumber.FLAG_NEGATIVE);
+        return "<AffixMatcher" + (isNegative ? ":negative " : " ") + prefix + "#" + suffix + ">";
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java

index 09165b43feca2e395ac6d92768e289ac4efe1428..129519fa0288571c467bf11e5c659b62fc75caf6 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
@@ -13,73 +13,39 @@ import com.ibm.icu.text.UnicodeSet;
   */
  public class DecimalMatcher implements NumberParseMatcher {
  
-    // TODO: Re-generate these sets from the database. They probably haven't been updated in a while.
-    private static final UnicodeSet UNISET_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]")
-            .freeze();
-    private static final UnicodeSet UNISET_STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]")
-            .freeze();
-    private static final UnicodeSet UNISET_COMMA_LIKE = new UnicodeSet(
-            "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze();
-    private static final UnicodeSet UNISET_STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]")
-            .freeze();
-    private static final UnicodeSet UNISET_OTHER_GROUPING_SEPARATORS = new UnicodeSet(
-            "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze();
-
-    public static DecimalMatcher getInstance(DecimalFormatSymbols symbols) {
-        String groupingSeparator = symbols.getGroupingSeparatorString();
-        UnicodeSet groupingSet = UNISET_COMMA_LIKE.contains(groupingSeparator)
-                ? UNISET_COMMA_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze()
-                : UNISET_PERIOD_LIKE.contains(groupingSeparator)
-                        ? UNISET_PERIOD_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze()
-                        : UNISET_OTHER_GROUPING_SEPARATORS.contains(groupingSeparator)
-                                ? UNISET_OTHER_GROUPING_SEPARATORS
-                                : new UnicodeSet().addAll(groupingSeparator).freeze();
-
-        String decimalSeparator = symbols.getDecimalSeparatorString();
-        UnicodeSet decimalSet = UNISET_COMMA_LIKE.contains(decimalSeparator) ? UNISET_COMMA_LIKE
-                : UNISET_PERIOD_LIKE.contains(decimalSeparator) ? UNISET_PERIOD_LIKE
-                        : new UnicodeSet().addAll(decimalSeparator).freeze();
-
-        return new DecimalMatcher(symbols.getDigitStrings(), groupingSet, decimalSet, false);
-    }
-
-    public static DecimalMatcher getExponentInstance(DecimalFormatSymbols symbols) {
-        return new DecimalMatcher(symbols.getDigitStrings(),
-                new UnicodeSet("[,]").freeze(),
-                new UnicodeSet("[.]").freeze(),
-                true);
-    }
  
-    private final String[] digitStrings;
-    private final UnicodeSet groupingUniSet;
-    private final UnicodeSet decimalUniSet;
-    private final UnicodeSet separatorSet;
      public boolean requireGroupingMatch = false;
+    public boolean decimalEnabled = true;
      public boolean groupingEnabled = true;
      public int grouping1 = 3;
      public int grouping2 = 3;
      public boolean integerOnly = false;
-    private final boolean isScientific;
-
-    private DecimalMatcher(
-            String[] digitStrings,
-            UnicodeSet groupingUniSet,
-            UnicodeSet decimalUniSet,
-            boolean isScientific) {
-        this.digitStrings = digitStrings;
-        this.groupingUniSet = groupingUniSet;
-        this.decimalUniSet = decimalUniSet;
-        if (groupingEnabled) {
-            separatorSet = groupingUniSet.cloneAsThawed().addAll(decimalUniSet).freeze();
-        } else {
-            separatorSet = decimalUniSet;
-        }
-        this.isScientific = isScientific;
+    public boolean isScientific = false;
+
+    private UnicodeSet groupingUniSet;
+    private UnicodeSet decimalUniSet;
+    private UnicodeSet separatorSet;
+    private String[] digitStrings;
+    private boolean frozen;
+
+    public DecimalMatcher() {
+        frozen = false;
+    }
+
+    public void freeze(DecimalFormatSymbols symbols, boolean isStrict) {
+        assert !frozen;
+        frozen = true;
+
+        groupingUniSet = SeparatorSetUtils.getGroupingUnicodeSet(symbols, isStrict);
+        decimalUniSet = SeparatorSetUtils.getDecimalUnicodeSet(symbols, isStrict);
+        separatorSet = SeparatorSetUtils.unionUnicodeSets(groupingUniSet, decimalUniSet);
+        digitStrings = symbols.getDigitStringsLocal();
      }
  
      @Override
      public boolean match(StringSegment segment, ParsedNumber result) {
-        if (result.quantity != null && !isScientific) {
+        assert frozen;
+        if (result.seenNumber() && !isScientific) {
              // A number has already been consumed.
              return false;
          }
@@ -167,8 +133,11 @@ public class DecimalMatcher implements NumberParseMatcher {
  
          if (isScientific) {
              result.quantity.adjustMagnitude(exponent);
+        } else if (result.quantity == null) {
+            // No-op: strings that start with a separator without any other digits
          } else if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) {
              // The final separator was a decimal separator.
+            result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
              result.quantity.adjustMagnitude(-currGroup);
              if (integerOnly) {
                  result.quantity.truncate();
@@ -188,8 +157,8 @@ public class DecimalMatcher implements NumberParseMatcher {
              result.quantity.adjustMagnitude(-currGroup);
              result.quantity.truncate();
              segment.setOffset(lastSeparatorOffset);
-//            result.quantity = null;
-//            segment.setOffset(initialOffset);
+            // result.quantity = null;
+            // segment.setOffset(initialOffset);
          }
  
          return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate();
@@ -202,6 +171,6 @@ public class DecimalMatcher implements NumberParseMatcher {
  
      @Override
      public String toString() {
-        return "<MantissaMatcher>";
+        return "<DecimalMatcher>";
      }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java

new file mode 100644 (file)

index 0000000..76d98b2
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java
@@ -0,0 +1,68 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class IgnorablesMatcher implements NumberParseMatcher {
+
+    // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
+    static final UnicodeSet UNISET_BIDI = new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze();
+
+    // This set was decided after discussion with icu-design@. See ticket #13309.
+    // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+    static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
+
+    /** The default set of ignorables. */
+    static final UnicodeSet DEFAULT_UNISET = UNISET_BIDI.cloneAsThawed().addAll(UNISET_WHITESPACE).freeze();
+
+    /** The default set of ignorables for strict mode. */
+    static final UnicodeSet STRICT_UNISET = UNISET_BIDI;
+
+    private static final IgnorablesMatcher DEFAULT_INSTANCE = new IgnorablesMatcher(DEFAULT_UNISET);
+    private static final IgnorablesMatcher STRICT_INSTANCE = new IgnorablesMatcher(STRICT_UNISET);
+
+    public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
+        assert ignorables.isFrozen();
+        if (ignorables == DEFAULT_UNISET || ignorables.equals(DEFAULT_UNISET)) {
+            return DEFAULT_INSTANCE;
+        } else if (ignorables == STRICT_UNISET || ignorables.equals(STRICT_UNISET)) {
+            return STRICT_INSTANCE;
+        } else {
+            return new IgnorablesMatcher(ignorables);
+        }
+    }
+
+    private final UnicodeSet ignorables;
+
+    private IgnorablesMatcher(UnicodeSet ignorables) {
+        this.ignorables = ignorables;
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        while (segment.length() > 0) {
+            int cp = segment.getCodePoint();
+            if (cp == -1 || !ignorables.contains(cp)) {
+                break;
+            }
+            segment.adjustOffset(Character.charCount(cp));
+            // Note: Do not touch the charsConsumed.
+        }
+        return segment.length() == 0 || segment.isLeadingSurrogate();
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<WhitespaceMatcher>";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java

new file mode 100644 (file)

index 0000000..795b729
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
@@ -0,0 +1,39 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+
+/**
+ * @author sffc
+ *
+ */
+public class NanMatcher implements NumberParseMatcher {
+
+    private final String nanString;
+
+    public NanMatcher(DecimalFormatSymbols symbols) {
+        nanString = symbols.getNaN();
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        int overlap = segment.getCommonPrefixLength(nanString);
+        if (overlap == nanString.length()) {
+            result.flags |= ParsedNumber.FLAG_NAN;
+            segment.adjustOffset(overlap);
+            result.setCharsConsumed(segment);
+            return false;
+        } else if (overlap == segment.length()) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java

index a6a01789044502f657c7a80b52674427bda30ee3..96d4dc6a2b025d288999998b2f2a3839c161bafa 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@@ -4,6 +4,7 @@ package com.ibm.icu.impl.number.parse;
  
  import java.text.ParsePosition;
  import java.util.ArrayList;
+import java.util.Collection;
  import java.util.Comparator;
  import java.util.List;
  
@@ -11,13 +12,11 @@ import com.ibm.icu.impl.number.AffixPatternProvider;
  import com.ibm.icu.impl.number.AffixUtils;
  import com.ibm.icu.impl.number.CustomSymbolCurrency;
  import com.ibm.icu.impl.number.DecimalFormatProperties;
-import com.ibm.icu.impl.number.MutablePatternModifier;
  import com.ibm.icu.impl.number.Parse.ParseMode;
  import com.ibm.icu.impl.number.PatternStringParser;
  import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
-import com.ibm.icu.number.NumberFormatter.SignDisplay;
-import com.ibm.icu.number.NumberFormatter.UnitWidth;
  import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
  import com.ibm.icu.util.Currency;
  import com.ibm.icu.util.CurrencyAmount;
  import com.ibm.icu.util.ULocale;
@@ -36,25 +35,15 @@ public class NumberParserImpl {
          ULocale locale = new ULocale("en_IN");
          DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
  
-        MutablePatternModifier mod = new MutablePatternModifier(false);
-        AffixPatternProvider provider = PatternStringParser.parseToPatternInfo(pattern);
-        mod.setPatternInfo(provider);
-        mod.setPatternAttributes(SignDisplay.AUTO, false);
-        mod.setSymbols(symbols, Currency.getInstance("USD"), UnitWidth.FULL_NAME, null);
-        int flags = 0;
-        if (provider.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
-            flags |= ParsedNumber.FLAG_PERCENT;
-        }
-        if (provider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
-            flags |= ParsedNumber.FLAG_PERMILLE;
-        }
-        AffixMatcher.generateFromPatternModifier(mod, flags, true, parser);
+        AffixPatternProvider patternInfo = PatternStringParser.parseToPatternInfo(pattern);
+        AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, new UnicodeSet(), true);
  
-        parser.addMatcher(WhitespaceMatcher.getInstance());
-        DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols);
+        parser.addMatcher(IgnorablesMatcher.getInstance(IgnorablesMatcher.DEFAULT_UNISET));
+        DecimalMatcher decimalMatcher = new DecimalMatcher();
          decimalMatcher.requireGroupingMatch = strictGrouping;
          decimalMatcher.grouping1 = 3;
          decimalMatcher.grouping2 = 2;
+        decimalMatcher.freeze(symbols, false);
          parser.addMatcher(decimalMatcher);
          parser.addMatcher(new MinusSignMatcher());
          parser.addMatcher(new ScientificMatcher(symbols));
@@ -65,10 +54,11 @@ public class NumberParserImpl {
          return parser;
      }
  
-    public static Number parseStatic(String input,
-      ParsePosition ppos,
-      DecimalFormatProperties properties,
-      DecimalFormatSymbols symbols) {
+    public static Number parseStatic(
+            String input,
+            ParsePosition ppos,
+            DecimalFormatProperties properties,
+            DecimalFormatSymbols symbols) {
          NumberParserImpl parser = createParserFromProperties(properties, symbols, false);
          ParsedNumber result = new ParsedNumber();
          parser.parse(input, true, result);
@@ -80,10 +70,11 @@ public class NumberParserImpl {
          }
      }
  
-    public static CurrencyAmount parseStaticCurrency(String input,
-      ParsePosition ppos,
-      DecimalFormatProperties properties,
-      DecimalFormatSymbols symbols) {
+    public static CurrencyAmount parseStaticCurrency(
+            String input,
+            ParsePosition ppos,
+            DecimalFormatProperties properties,
+            DecimalFormatSymbols symbols) {
          NumberParserImpl parser = createParserFromProperties(properties, symbols, true);
          ParsedNumber result = new ParsedNumber();
          parser.parse(input, true, result);
@@ -111,62 +102,49 @@ public class NumberParserImpl {
          ULocale locale = symbols.getULocale();
          Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
          boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
+        UnicodeSet ignorables = isStrict ? IgnorablesMatcher.STRICT_UNISET : IgnorablesMatcher.DEFAULT_UNISET;
  
-        ////////////////////////
-        /// CURRENCY MATCHER ///
-        ////////////////////////
-
-        if (parseCurrency) {
-            parser.addMatcher(new CurrencyMatcher(locale));
-        }
+        boolean decimalSeparatorRequired = properties.getDecimalPatternMatchRequired()
+                ? (properties.getDecimalSeparatorAlwaysShown() || properties.getMaximumFractionDigits() != 0)
+                : false;
  
          //////////////////////
          /// AFFIX MATCHERS ///
          //////////////////////
  
          // Set up a pattern modifier with mostly defaults to generate AffixMatchers.
-        MutablePatternModifier mod = new MutablePatternModifier(false);
          AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties);
-//        mod.setPatternInfo(patternInfo);
-//        mod.setPatternAttributes(SignDisplay.AUTO, false);
-//        mod.setSymbols(symbols, currency, UnitWidth.SHORT, null);
-//
-//        // Figure out which flags correspond to this pattern modifier. Note: negatives are taken care of in the
-//        // generateFromPatternModifier function.
-//        int flags = 0;
-//        if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
-//            flags |= ParsedNumber.FLAG_PERCENT;
-//        }
-//        if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
-//            flags |= ParsedNumber.FLAG_PERMILLE;
-//        }
-//        if (patternInfo.hasCurrencySign()) {
-//            flags |= ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY;
-//        }
-//
-//        parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
-//
-//        AffixMatcher.generateFromPatternModifier(mod, flags, !isStrict && !parseCurrency, parser);
-
-        AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, !isStrict);
+        AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, !isStrict);
+
+        ////////////////////////
+        /// CURRENCY MATCHER ///
+        ////////////////////////
+
+        parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
+        if (parseCurrency) {
+            parser.addMatcher(new CurrencyMatcher(locale));
+        }
  
          ///////////////////////////////
          /// OTHER STANDARD MATCHERS ///
          ///////////////////////////////
  
          if (!isStrict) {
-            parser.addMatcher(WhitespaceMatcher.getInstance());
+            parser.addMatcher(IgnorablesMatcher.getInstance(ignorables));
          }
          if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)) {
              parser.addMatcher(new PlusSignMatcher());
          }
          parser.addMatcher(new MinusSignMatcher());
-        DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols);
-        decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0;
+        parser.addMatcher(new NanMatcher(symbols));
+        DecimalMatcher decimalMatcher = new DecimalMatcher();
          decimalMatcher.requireGroupingMatch = isStrict;
+        decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0;
+        decimalMatcher.decimalEnabled = properties.getDecimalPatternMatchRequired() ? decimalSeparatorRequired : true;
          decimalMatcher.grouping1 = properties.getGroupingSize();
          decimalMatcher.grouping2 = properties.getSecondaryGroupingSize();
          decimalMatcher.integerOnly = properties.getParseIntegerOnly();
+        decimalMatcher.freeze(symbols, isStrict);
          parser.addMatcher(decimalMatcher);
          if (!properties.getParseNoExponent()) {
              parser.addMatcher(new ScientificMatcher(symbols));
@@ -186,6 +164,9 @@ public class NumberParserImpl {
          if (parseCurrency) {
              parser.addMatcher(new RequireCurrencyMatcher());
          }
+        if (decimalSeparatorRequired) {
+            parser.addMatcher(new RequireDecimalSeparatorMatcher());
+        }
  
          ////////////////////////
          /// OTHER ATTRIBUTES ///
@@ -212,14 +193,22 @@ public class NumberParserImpl {
      }
  
      public void addMatcher(NumberParseMatcher matcher) {
-        matchers.add(matcher);
+        assert !frozen;
+        this.matchers.add(matcher);
+    }
+
+    public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
+        assert !frozen;
+        this.matchers.addAll(matchers);
      }
  
      public void setComparator(Comparator<ParsedNumber> comparator) {
+        assert !frozen;
          this.comparator = comparator;
      }
  
      public void setIgnoreCase(boolean ignoreCase) {
+        assert !frozen;
          this.ignoreCase = ignoreCase;
      }
  
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java

index d9e945c063ff4481f0c86ef46db21cfca843033f..02555f0778d14c5a42ad16925505389d5615fcbb 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
@@ -46,6 +46,8 @@ public class ParsedNumber {
      public static final int FLAG_PERMILLE = 0x0004;
      public static final int FLAG_HAS_EXPONENT = 0x0008;
      public static final int FLAG_HAS_DEFAULT_CURRENCY = 0x0010;
+    public static final int FLAG_HAS_DECIMAL_SEPARATOR = 0x0020;
+    public static final int FLAG_NAN = 0x0040;
  
      /** A Comparator that favors ParsedNumbers with the most chars consumed. */
      public static final Comparator<ParsedNumber> COMPARATOR = new Comparator<ParsedNumber>() {
@@ -84,7 +86,14 @@ public class ParsedNumber {
          charsConsumed = segment.getOffset();
      }
  
+    public boolean seenNumber() {
+        return quantity != null || 0 != (flags & FLAG_NAN);
+    }
+
      public double getDouble() {
+        if (0 != (flags & FLAG_NAN)) {
+            return Double.NaN;
+        }
          double d = quantity.toDouble();
          if (0 != (flags & FLAG_NEGATIVE)) {
              d = -d;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java

new file mode 100644 (file)

index 0000000..2348e48
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java
@@ -0,0 +1,27 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+/**
+ * @author sffc
+ *
+ */
+public class RequireDecimalSeparatorMatcher implements NumberParseMatcher {
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        return false;
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        if (0 == (result.flags & ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR)) {
+            result.clear();
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "<RequireDecimalSeparator>";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java

index 2477a652d9fa98fec53a9d9cede91626a80e80db..c7a168ce4da8dca1aaf4c5216650406c1e6bce8a 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java
@@ -16,7 +16,7 @@ public class RequireNumberMatcher implements NumberParseMatcher {
      @Override
      public void postProcess(ParsedNumber result) {
          // Require that a number is matched.
-        if (result.quantity == null) {
+        if (!result.seenNumber()) {
              result.clear();
          }
      }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java

index bf58e976403837a06feaca22cae5dcce3879db55..48032a59df18f6d766f54aedaeaa383d702c75cc 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@@ -17,13 +17,17 @@ public class ScientificMatcher implements NumberParseMatcher {
      public ScientificMatcher(DecimalFormatSymbols symbols) {
          exponentSeparatorString = symbols.getExponentSeparator();
          minusSignString = symbols.getMinusSignString();
-        exponentMatcher = DecimalMatcher.getExponentInstance(symbols);
+        exponentMatcher = new DecimalMatcher();
+        exponentMatcher.isScientific = true;
+        exponentMatcher.groupingEnabled = false;
+        exponentMatcher.decimalEnabled = false;
+        exponentMatcher.freeze(symbols, false);
      }
  
      @Override
      public boolean match(StringSegment segment, ParsedNumber result) {
          // Only accept scientific notation after the mantissa.
-        if (result.quantity == null) {
+        if (!result.seenNumber()) {
              return false;
          }
  
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java

new file mode 100644 (file)

index 0000000..16b2be1
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java
@@ -0,0 +1,109 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class SeparatorSetUtils {
+
+    // TODO: Re-generate these sets from the database. They probably haven't been updated in a while.
+
+    static final UnicodeSet COMMA_LIKE = new UnicodeSet(
+            "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze();
+
+    static final UnicodeSet STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze();
+
+    static final UnicodeSet PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze();
+
+    static final UnicodeSet STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze();
+
+    static final UnicodeSet OTHER_GROUPING_SEPARATORS = new UnicodeSet(
+            "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze();
+
+    static final UnicodeSet COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE).freeze();
+
+    static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+            .addAll(STRICT_PERIOD_LIKE).freeze();
+
+    static final UnicodeSet COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(OTHER_GROUPING_SEPARATORS)
+            .freeze();
+
+    static final UnicodeSet STRICT_COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+            .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+    static final UnicodeSet PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(PERIOD_LIKE)
+            .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+    static final UnicodeSet STRICT_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_PERIOD_LIKE)
+            .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+    static final UnicodeSet COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE)
+            .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+    static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+            .addAll(STRICT_PERIOD_LIKE).addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+    public static UnicodeSet getGroupingUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
+        if (isStrict) {
+            return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
+                    STRICT_COMMA_LIKE_OR_OTHER,
+                    STRICT_PERIOD_LIKE_OR_OTHER,
+                    OTHER_GROUPING_SEPARATORS);
+        } else {
+            return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
+                    COMMA_LIKE_OR_OTHER,
+                    PERIOD_LIKE_OR_OTHER,
+                    OTHER_GROUPING_SEPARATORS);
+        }
+    }
+
+    public static UnicodeSet getDecimalUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
+        if (isStrict) {
+            return chooseUnicodeSet(symbols.getDecimalSeparatorString(), STRICT_COMMA_LIKE, STRICT_PERIOD_LIKE);
+        } else {
+            return chooseUnicodeSet(symbols.getDecimalSeparatorString(), COMMA_LIKE, PERIOD_LIKE);
+        }
+    }
+
+    private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1) {
+        return set1.contains(str) ? set1 : new UnicodeSet().add(str).freeze();
+    }
+
+    private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2) {
+        return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2);
+    }
+
+    private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2, UnicodeSet set3) {
+        return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2, set3);
+    }
+
+    public static UnicodeSet unionUnicodeSets(UnicodeSet set1, UnicodeSet set2) {
+        // Note: == operators should be okay here since non-static UnicodeSets happen only in fallback cases.
+        if (set1 == UnicodeSet.EMPTY && set2 == UnicodeSet.EMPTY) {
+            return UnicodeSet.EMPTY;
+        } else if (set1 == COMMA_LIKE_OR_OTHER && set2 == PERIOD_LIKE_OR_OTHER) {
+            return COMMA_OR_PERIOD_LIKE_OR_OTHER;
+        } else if (set1 == PERIOD_LIKE_OR_OTHER && set2 == COMMA_LIKE_OR_OTHER) {
+            return COMMA_OR_PERIOD_LIKE_OR_OTHER;
+        } else if (set1 == STRICT_COMMA_LIKE_OR_OTHER && set2 == STRICT_PERIOD_LIKE_OR_OTHER) {
+            return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
+        } else if (set1 == STRICT_PERIOD_LIKE_OR_OTHER && set2 == STRICT_COMMA_LIKE_OR_OTHER) {
+            return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
+        } else if (set1 == COMMA_LIKE && set2 == PERIOD_LIKE) {
+            return COMMA_OR_PERIOD_LIKE;
+        } else if (set1 == PERIOD_LIKE && set2 == COMMA_LIKE) {
+            return COMMA_OR_PERIOD_LIKE;
+        } else if (set1 == STRICT_COMMA_LIKE && set2 == STRICT_PERIOD_LIKE) {
+            return STRICT_COMMA_OR_PERIOD_LIKE;
+        } else if (set1 == STRICT_PERIOD_LIKE && set2 == STRICT_COMMA_LIKE) {
+            return STRICT_COMMA_OR_PERIOD_LIKE;
+        } else {
+            return set1.cloneAsThawed().addAll(set2).freeze();
+        }
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java

deleted file mode 100644 (file)

index 51ed99c..0000000
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.text.UnicodeSet;
-
-/**
- * @author sffc
- *
- */
-public class WhitespaceMatcher implements NumberParseMatcher {
-
-    // This set was decided after discussion with icu-design@. See ticket #13309.
-    // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
-    private static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
-
-    private static final WhitespaceMatcher INSTANCE = new WhitespaceMatcher();
-
-    public static WhitespaceMatcher getInstance() {
-        return INSTANCE;
-    }
-
-    private WhitespaceMatcher() {
-    }
-
-    @Override
-    public boolean match(StringSegment segment, ParsedNumber result) {
-        while (segment.length() > 0) {
-            int cp = segment.getCodePoint();
-            if (cp == -1 || !UNISET_WHITESPACE.contains(cp)) {
-                break;
-            }
-            segment.adjustOffset(Character.charCount(cp));
-            // Note: Do not touch the charsConsumed.
-        }
-        return segment.length() == 0 || segment.isLeadingSurrogate();
-    }
-
-    @Override
-    public void postProcess(ParsedNumber result) {
-        // No-op
-    }
-
-    @Override
-    public String toString() {
-        return "<WhitespaceMatcher>";
-    }
-}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt

index 682c7adc730e70a8748cb06691ca4e296c0219b0..ae7294b55f7d79129fad3868fe6ae7f73828da9e 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
@@ -975,8 +975,7 @@ set locale en
  set pattern '-'#y
  begin
  parse  output  breaks
-// FIXME
--45y   45      P
+-45y   45
  
  test parse with locale symbols
  // The grouping separator in it_CH is an apostrophe
@@ -1039,7 +1038,7 @@ USD53.45  53.45   USD
  (USD 7.926)    -7.926  USD     CJ
  USD (7.926)    -7.926  USD     CJ
  USD (7.92)     -7.92   USD     CJ
-(7.92)USD      -7.92   USD     CJP
+(7.92)USD      -7.92   USD     CJ
  USD(7.92)      -7.92   USD     CJ
  (8) USD        -8      USD
  -8 USD -8      USD     C
@@ -1060,10 +1059,10 @@ US Dollars (53.45)      -53.45  USD     CJ
  US Dollar (53.45)      -53.45  USD     CJ
  (53.45) US Dollar      -53.45  USD
  US Dollars(53.45)      -53.45  USD     CJ
-(53.45)US Dollars      -53.45  USD     CJP
+(53.45)US Dollars      -53.45  USD     CJ
  US Dollar(53.45)       -53.45  USD     CJ
  US Dollat(53.45)       fail    USD
-(53.45)US Dollar       -53.45  USD     CJP
+(53.45)US Dollar       -53.45  USD     CJ
  
  
  test parse currency ISO negative
@@ -1074,9 +1073,8 @@ parse     output  outputCurrency  breaks
  53.45  fail    GBP
  £53.45        53.45   GBP
  $53.45 fail    USD     J
-// FIXME: Fix the failures in this section. Positive/negative mixup.
-53.45 USD      53.45   USD     P
-53.45 GBP      53.45   GBP     P
+53.45 USD      53.45   USD
+53.45 GBP      53.45   GBP
  USD 53.45      53.45   USD     J
  53.45USD       53.45   USD     CJ
  USD53.45       53.45   USD
@@ -1087,12 +1085,12 @@ USD -7.926      -7.926  USD     CJ
  -7.92USD       -7.92   USD     CJ
  USD-7.92       -7.92   USD     CJ
  -8 USD -8      USD
-67 USD 67      USD     P
+67 USD 67      USD
  53.45$ fail    USD
  US Dollars 53.45       53.45   USD     J
-53.45 US Dollars       53.45   USD     P
+53.45 US Dollars       53.45   USD
  US Dollar 53.45        53.45   USD     J
-53.45 US Dollar        53.45   USD     P
+53.45 US Dollar        53.45   USD
  US Dollars53.45        53.45   USD
  53.45US Dollars        53.45   USD     CJ
  US Dollar53.45 53.45   USD
@@ -1114,16 +1112,16 @@ $53.45  fail    USD     J
  USD 53.45      53.45   USD     J
  53.45USD       53.45   USD     CJ
  USD53.45       53.45   USD
-(7.92) USD     -7.92   USD     P
-(7.92) GBP     -7.92   GBP     P
-(7.926) USD    -7.926  USD     P
-(7.926 USD)    -7.926  USD     CJP
-(USD 7.926)    -7.926  USD     CJP
-USD (7.926)    -7.926  USD     CJP
-USD (7.92)     -7.92   USD     CJP
-(7.92)USD      -7.92   USD     CJP
-USD(7.92)      -7.92   USD     CJP
-(8) USD        -8      USD     P
+(7.92) USD     -7.92   USD
+(7.92) GBP     -7.92   GBP
+(7.926) USD    -7.926  USD
+(7.926 USD)    -7.926  USD     CJ
+(USD 7.926)    -7.926  USD     CJ
+USD (7.926)    -7.926  USD     CJ
+USD (7.92)     -7.92   USD     CJ
+(7.92)USD      -7.92   USD     CJ
+USD(7.92)      -7.92   USD     CJ
+(8) USD        -8      USD
  -8 USD -8      USD     C
  67 USD 67      USD
  // J throws a NullPointerException on the next case
@@ -1152,16 +1150,16 @@ $53.45  fail    USD     J
  USD 53.45      53.45   USD     J
  53.45USD       53.45   USD     CJ
  USD53.45       53.45   USD
-(7.92) USD     -7.92   USD     P
-(7.92) GBP     -7.92   GBP     P
-(7.926) USD    -7.926  USD     P
-(7.926 USD)    -7.926  USD     CJP
-(USD 7.926)    -7.926  USD     CJP
-USD (7.926)    -7.926  USD     CJP
-USD (7.92)     -7.92   USD     CJP
-(7.92)USD      -7.92   USD     CJP
-USD(7.92)      -7.92   USD     CJP
-(8) USD        -8      USD     P
+(7.92) USD     -7.92   USD
+(7.92) GBP     -7.92   GBP
+(7.926) USD    -7.926  USD
+(7.926 USD)    -7.926  USD     CJ
+(USD 7.926)    -7.926  USD     CJ
+USD (7.926)    -7.926  USD     CJ
+USD (7.92)     -7.92   USD     CJ
+(7.92)USD      -7.92   USD     CJ
+USD(7.92)      -7.92   USD     CJ
+(8) USD        -8      USD
  -8 USD -8      USD     C
  67 USD 67      USD
  53.45$ fail    USD
@@ -1190,16 +1188,16 @@ USD 53.45       53.45   USD     J
  53.45USD       53.45   USD     CJ
  USD53.45       53.45   USD
  // S fails these because '(' is an incomplete prefix.
-(7.92) USD     -7.92   USD     CJSP
-(7.92) GBP     -7.92   GBP     CJSP
-(7.926) USD    -7.926  USD     CJSP
-(7.926 USD)    -7.926  USD     CJSP
-(USD 7.926)    -7.926  USD     JP
-USD (7.926)    -7.926  USD     CJSP
-USD (7.92)     -7.92   USD     CJSP
-(7.92)USD      -7.92   USD     CJSP
-USD(7.92)      -7.92   USD     CJSP
-(8) USD        -8      USD     CJSP
+(7.92) USD     -7.92   USD     CJS
+(7.92) GBP     -7.92   GBP     CJS
+(7.926) USD    -7.926  USD     CJS
+(7.926 USD)    -7.926  USD     CJS
+(USD 7.926)    -7.926  USD     J
+USD (7.926)    -7.926  USD     CJS
+USD (7.92)     -7.92   USD     CJS
+(7.92)USD      -7.92   USD     CJS
+USD(7.92)      -7.92   USD     CJS
+(8) USD        -8      USD     CJS
  -8 USD -8      USD     C
  67 USD 67      USD     C
  53.45$ fail    USD
@@ -1290,16 +1288,17 @@ Euros 7.82      7.82    EUR
  test parse currency without currency mode
  // Should accept a symbol associated with the currency specified by the API,
  // but should not traverse the full currency data.
+// P always traverses full currency data.
  set locale en_US
  set pattern \u00a4#,##0.00
  begin
  parse  currency        output  breaks
  $52.41 USD     52.41
  USD52.41       USD     52.41   K
-\u20ac52.41    USD     fail
-EUR52.41       USD     fail
-$52.41 EUR     fail
-USD52.41       EUR     fail
+\u20ac52.41    USD     fail    P
+EUR52.41       USD     fail    P
+$52.41 EUR     fail    P
+USD52.41       EUR     fail    P
  \u20ac52.41    EUR     52.41   K
  EUR52.41       EUR     52.41
  
@@ -1361,12 +1360,13 @@ set decimalPatternMatchRequired 1
  begin
  pattern        parse   output  breaks
  // K doesn't support this feature.
+// P stops parsing when it sees the decimal separator, but doesn't fail.
  0      123     123
-0      123.    fail    CJK
-0      1.23    fail    CJK
+0      123.    fail    CJKP
+0      1.23    fail    CJKP
  0      -513    -513
-0      -513.   fail    CJK
-0      -5.13   fail    CJK
+0      -513.   fail    CJKP
+0      -5.13   fail    CJKP
  0.0    123     fail    K
  0.0    123.    123     C
  0.0    1.23    1.23    C
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java

index bbe70cc08536c628a9b40fe6d6aed654c78b241c..15ae14a7d1fea385b3b5afde827cc2256b87fbe9 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java
@@ -10,6 +10,7 @@ import org.junit.Test;
  import com.ibm.icu.impl.number.AffixUtils;
  import com.ibm.icu.impl.number.AffixUtils.SymbolProvider;
  import com.ibm.icu.impl.number.NumberStringBuilder;
+import com.ibm.icu.text.UnicodeSet;
  
  public class AffixUtilsTest {
  
@@ -218,20 +219,22 @@ public class AffixUtilsTest {
    }
  
    @Test
-  public void testRemoveSymbols() {
+  public void testWithoutSymbolsOrIgnorables() {
      String[][] cases = {
          {"", ""},
          {"-", ""},
+        {" ", ""},
          {"'-'", "-"},
-        {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤", "abcdefgh"},
+        {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤i\tj", "abcdefghij"},
      };
  
+    UnicodeSet ignorables = new UnicodeSet("[:whitespace:]");
      StringBuilder sb = new StringBuilder();
      for (String[] cas : cases) {
        String input = cas[0];
        String expected = cas[1];
        sb.setLength(0);
-      AffixUtils.removeSymbols(input, sb);
+      AffixUtils.withoutSymbolsOrIgnorables(input, ignorables, sb);
        assertEquals("Removing symbols from: " + input, expected, sb.toString());
      }
    }
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java

index 60d86aea14afacd4d96b0bae43d971f8adff1017..ee7a73cbc9b6321cbc9d9d7705da8ddf74b56138 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@@ -52,10 +52,17 @@ public class NumberParserTest {
                  { 3, "a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 14, 51423. },
                  { 3, "-a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
                  { 3, "a -𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
-                { 3, "𝟱𝟭𝟰𝟮𝟯", "0;(0)", 10, 51423. },
-                { 3, "(𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 12, -51423. },
-                { 3, "𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 11, -51423. },
-                { 3, "(𝟱𝟭𝟰𝟮𝟯", "0;(0)", 11, -51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 10, 51423. },
+                { 3, "[𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, 51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 11, 51423. },
+                { 3, "[𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 12, 51423. },
+                { 3, "(𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, -51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 11, -51423. },
+                { 3, "(𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 12, -51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 10, 51423. },
+                { 3, "{𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 11, 51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 11, 51423. },
+                { 3, "{𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 12, 51423. },
                  { 1, "a40b", "a0'0b'", 3, 40. }, // greedy code path thinks "40" is the number
                  { 2, "a40b", "a0'0b'", 4, 4. }, // slow code path find the suffix "0b"
                  { 3, "𝟱.𝟭𝟰𝟮E𝟯", "0", 12, 5142. },
author	Shane Carr <shane@unicode.org>
	Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)
committer	Shane Carr <shane@unicode.org>
	Sat, 16 Dec 2017 02:54:58 +0000 (02:54 +0000)
icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternProvider.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixUtils.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireDecimalSeparatorMatcher.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/RequireNumberMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SeparatorSetUtils.java	[new file with mode: 0644]	patch \| blob
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java	[deleted file]	patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixUtilsTest.java		patch \| blob \| history
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java		patch \| blob \| history