]> granicus.if.org Git - icu/commitdiff
ICU-13513 Initial modular framework for number parsing.
authorShane Carr <shane@unicode.org>
Wed, 13 Dec 2017 05:03:28 +0000 (05:03 +0000)
committerShane Carr <shane@unicode.org>
Wed, 13 Dec 2017 05:03:28 +0000 (05:03 +0000)
X-SVN-Rev: 40725

18 files changed:
icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/MutablePatternModifier.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherUtils.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java [new file with mode: 0644]
icu4j/main/classes/core/src/com/ibm/icu/text/TimeZoneFormat.java
icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java [new file with mode: 0644]
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java [new file with mode: 0644]

index c7b8d443cdabce1e8896a963a4d46ef8b0fde2a5..1ec86ebee155655b9d7a7d8589acd6aefec8736a 100644 (file)
@@ -25,6 +25,11 @@ public class TextTrieMap<V> {
     private Node _root = new Node();
     boolean _ignoreCase;
 
+    public static class Output {
+        public int matchLength;
+        public boolean partialMatch;
+    }
+
     /**
      * Constructs a TextTrieMap object.
      *
@@ -74,25 +79,29 @@ public class TextTrieMap<V> {
         return get(text, start, null);
     }
 
-    public Iterator<V> get(CharSequence text, int start, int[] matchLen) {
+    public Iterator<V> get(CharSequence text, int start, Output output) {
         LongestMatchHandler<V> handler = new LongestMatchHandler<V>();
-        find(text, start, handler);
-        if (matchLen != null && matchLen.length > 0) {
-            matchLen[0] = handler.getMatchLength();
+        find(text, start, handler, output);
+        if (output != null) {
+            output.matchLength = handler.getMatchLength();
         }
         return handler.getMatches();
     }
 
     public void find(CharSequence text, ResultHandler<V> handler) {
-        find(text, 0, handler);
+        find(text, 0, handler, new Output());
     }
 
     public void find(CharSequence text, int offset, ResultHandler<V> handler) {
+        find(text, offset, handler, new Output());
+    }
+
+    private void find(CharSequence text, int offset, ResultHandler<V> handler, Output output) {
         CharIterator chitr = new CharIterator(text, offset, _ignoreCase);
-        find(_root, chitr, handler);
+        find(_root, chitr, handler, output);
     }
 
-    private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) {
+    private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler, Output output) {
         Iterator<V> values = node.values();
         if (values != null) {
             if (!handler.handlePrefixMatch(chitr.processedLength(), values)) {
@@ -100,9 +109,9 @@ public class TextTrieMap<V> {
             }
         }
 
-        Node nextMatch = node.findMatch(chitr);
+        Node nextMatch = node.findMatch(chitr, output);
         if (nextMatch != null) {
-            find(nextMatch, chitr, handler);
+            find(nextMatch, chitr, handler, output);
         }
     }
 
@@ -344,11 +353,12 @@ public class TextTrieMap<V> {
             add(toCharArray(buf), 0, value);
         }
 
-        public Node findMatch(CharIterator chitr) {
+        public Node findMatch(CharIterator chitr, Output output) {
             if (_children == null) {
                 return null;
             }
             if (!chitr.hasNext()) {
+                output.partialMatch = true;
                 return null;
             }
             Node match = null;
@@ -358,7 +368,7 @@ public class TextTrieMap<V> {
                     break;
                 }
                 if (ch == child._text[0]) {
-                    if (child.matchFollowing(chitr)) {
+                    if (child.matchFollowing(chitr, output)) {
                         match = child;
                     }
                     break;
@@ -436,11 +446,12 @@ public class TextTrieMap<V> {
             litr.add(new Node(subArray(text, offset), addValue(null, value), null));
         }
 
-        private boolean matchFollowing(CharIterator chitr) {
+        private boolean matchFollowing(CharIterator chitr, Output output) {
             boolean matched = true;
             int idx = 1;
             while (idx < _text.length) {
                 if(!chitr.hasNext()) {
+                    output.partialMatch = true;
                     matched = false;
                     break;
                 }
index df1fa909930ed426f0ef01b1667d47da8c030bc8..929c50b06017be15f3134883a85935e50966fbdf 100644 (file)
@@ -105,7 +105,7 @@ public class MutablePatternModifier implements Modifier, SymbolProvider, CharSeq
      *            convenience method {@link #needsPlurals()}.
      */
     public void setSymbols(DecimalFormatSymbols symbols, Currency currency, UnitWidth unitWidth, PluralRules rules) {
-        assert (rules != null) == needsPlurals();
+        //assert (rules != null) == needsPlurals();
         this.symbols = symbols;
         this.currency = currency;
         this.unitWidth = unitWidth;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixMatcher.java
new file mode 100644 (file)
index 0000000..a967158
--- /dev/null
@@ -0,0 +1,186 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.Comparator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import com.ibm.icu.impl.StandardPlural;
+import com.ibm.icu.impl.number.MutablePatternModifier;
+import com.ibm.icu.impl.number.NumberStringBuilder;
+
+/**
+ * @author sffc
+ *
+ */
+public class AffixMatcher implements NumberParseMatcher {
+    private final String prefix;
+    private final String suffix;
+    private final int flags;
+
+    /**
+     * Comparator for two AffixMatcher instances which prioritizes longer prefixes followed by longer suffixes, ensuring
+     * that the longest prefix/suffix pair is always chosen.
+     */
+    public static final Comparator<AffixMatcher> COMPARATOR = new Comparator<AffixMatcher>() {
+        @Override
+        public int compare(AffixMatcher o1, AffixMatcher o2) {
+            if (o1.prefix.length() != o2.prefix.length()) {
+                return o1.prefix.length() > o2.prefix.length() ? -1 : 1;
+            } else if (o1.suffix.length() != o2.suffix.length()) {
+                return o1.suffix.length() > o2.suffix.length() ? -1 : 1;
+            } else if (!o1.equals(o2)) {
+                // If the prefix and suffix are the same length, arbitrarily break ties.
+                // We can't return zero unless the elements are equal.
+                return o1.hashCode() > o2.hashCode() ? -1 : 1;
+            } else {
+                return 0;
+            }
+        }
+    };
+
+    /**
+     * Creates multiple AffixMatchers, enough to cover the requirements for the given pattern modifier, appending them
+     * in order to the NumberParserImpl.
+     */
+    public static void generateFromPatternModifier(
+            MutablePatternModifier patternModifier,
+            int flags,
+            NumberParserImpl output) {
+
+        // Store the matchers in a TreeSet to ensure both uniqueness and order.
+        Set<AffixMatcher> matchers = new TreeSet<AffixMatcher>(COMPARATOR);
+
+        // Construct one matcher per isNegative/plural combination. Most of the time, plurals aren't needed, so only
+        // two matchers will be created, one for positive and one for negative.
+        NumberStringBuilder nsb = new NumberStringBuilder();
+        boolean isNegative = false;
+        while (true) {
+            if (isNegative) {
+                flags |= ParsedNumber.FLAG_NEGATIVE;
+            }
+
+            if (patternModifier.needsPlurals()) {
+                for (StandardPlural plural : StandardPlural.VALUES) {
+                    patternModifier.setNumberProperties(isNegative, plural);
+                    matchers.add(getInstance(patternModifier, flags, nsb));
+                }
+            } else {
+                patternModifier.setNumberProperties(isNegative, null);
+                matchers.add(getInstance(patternModifier, flags, nsb));
+            }
+
+            if (isNegative) {
+                break;
+            } else {
+                isNegative = true;
+            }
+        }
+
+        for (AffixMatcher matcher : matchers) {
+            output.addMatcher(matcher);
+        }
+    }
+
+    /**
+     * Constructs an AffixMatcher from the given MutablePatternModifier and flags. The NumberStringBuilder is used as a
+     * temporary object only.
+     */
+    private static AffixMatcher getInstance(
+            MutablePatternModifier patternModifier,
+            int flags,
+            NumberStringBuilder nsb) {
+        // TODO: Make this more efficient (avoid the substrings and things)
+        nsb.clear();
+        patternModifier.apply(nsb, 0, 0);
+        int prefixLength = patternModifier.getPrefixLength();
+        String full = nsb.toString();
+        String prefix = full.substring(0, prefixLength);
+        String suffix = full.substring(prefixLength);
+        return new AffixMatcher(prefix, suffix, flags);
+    }
+
+    private AffixMatcher(String prefix, String suffix, int flags) {
+        this.prefix = prefix;
+        this.suffix = suffix;
+        this.flags = flags;
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        if (result.quantity == null) {
+            // Prefix
+            if (result.prefix != null || prefix.length() == 0) {
+                return false;
+            }
+            int overlap = segment.getCommonPrefixLength(prefix);
+            if (overlap == prefix.length()) {
+                result.prefix = prefix;
+                segment.adjustOffset(overlap);
+                result.setCharsConsumed(segment);
+                return false;
+            } else if (overlap == segment.length()) {
+                return true;
+            }
+
+        } else {
+            // Suffix
+            if (result.suffix != null || suffix.length() == 0 || !prefix.equals(orEmpty(result.prefix))) {
+                return false;
+            }
+            int overlap = segment.getCommonPrefixLength(suffix);
+            if (overlap == suffix.length()) {
+                result.suffix = suffix;
+                segment.adjustOffset(overlap);
+                result.setCharsConsumed(segment);
+                return false;
+            } else if (overlap == segment.length()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // Check to see if our affix is the one that was matched. If so, set the flags in the result.
+        if (prefix.equals(orEmpty(result.prefix)) && suffix.equals(orEmpty(result.suffix))) {
+            result.flags |= flags;
+        }
+    }
+
+    /**
+     * Returns the input string, or "" if input is null.
+     */
+    static String orEmpty(String str) {
+        return str == null ? "" : str;
+    }
+
+    /**
+     * Returns the sum of prefix and suffix length in the ParsedNumber.
+     */
+    public static int affixLength(ParsedNumber o2) {
+        return orEmpty(o2.prefix).length() + orEmpty(o2.suffix).length();
+    }
+
+    @Override
+    public boolean equals(Object _other) {
+        if (!(_other instanceof AffixMatcher)) {
+            return false;
+        }
+        AffixMatcher other = (AffixMatcher) _other;
+        return prefix.equals(other.prefix) && suffix.equals(other.suffix) && flags == other.flags;
+    }
+
+    @Override
+    public int hashCode() {
+        return prefix.hashCode() ^ suffix.hashCode() ^ flags;
+    }
+
+    @Override
+    public String toString() {
+        return "<AffixMatcher \"" + prefix + "\" \"" + suffix + "\">";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
new file mode 100644 (file)
index 0000000..88401d7
--- /dev/null
@@ -0,0 +1,54 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.Iterator;
+
+import com.ibm.icu.impl.TextTrieMap;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.Currency.CurrencyStringInfo;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * @author sffc
+ *
+ */
+public class CurrencyMatcher implements NumberParseMatcher {
+
+    private final TextTrieMap<CurrencyStringInfo> longNameTrie;
+    private final TextTrieMap<CurrencyStringInfo> symbolTrie;
+
+    public CurrencyMatcher(ULocale locale) {
+        longNameTrie = Currency.getParsingTrie(locale, Currency.LONG_NAME);
+        symbolTrie = Currency.getParsingTrie(locale, Currency.SYMBOL_NAME);
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        if (result.currencyCode != null) {
+            return false;
+        }
+
+        TextTrieMap.Output trieOutput = new TextTrieMap.Output();
+        Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
+        if (values == null) {
+            values = symbolTrie.get(segment, 0, trieOutput);
+        }
+        if (values != null) {
+            result.currencyCode = values.next().getISOCode();
+            segment.adjustOffset(trieOutput.matchLength);
+            result.setCharsConsumed(segment);
+        }
+        return trieOutput.partialMatch;
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<CurrencyMatcher>";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java
new file mode 100644 (file)
index 0000000..c6a63a9
--- /dev/null
@@ -0,0 +1,165 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.math.RoundingMode;
+
+import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
+import com.ibm.icu.impl.number.RoundingUtils;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class DecimalMatcher implements NumberParseMatcher {
+
+    /**
+     * @return
+     */
+    public static DecimalMatcher getInstance(DecimalFormatSymbols symbols) {
+        // TODO(sffc): Auto-generated method stub
+        return new DecimalMatcher(symbols.getDigitStrings(),
+                new UnicodeSet("[,]").freeze(),
+                new UnicodeSet("[.]").freeze(),
+                false);
+    }
+
+    public static DecimalMatcher getExponentInstance(DecimalFormatSymbols symbols) {
+        return new DecimalMatcher(symbols.getDigitStrings(),
+                new UnicodeSet("[,]").freeze(),
+                new UnicodeSet("[.]").freeze(),
+                true);
+    }
+
+    private final String[] digitStrings;
+    private final UnicodeSet groupingUniSet;
+    private final UnicodeSet decimalUniSet;
+    private final UnicodeSet separatorSet;
+    public boolean requireGroupingMatch = false;
+    private final int grouping1 = 3;
+    private final int grouping2 = 3;
+    private final boolean isScientific;
+
+    private DecimalMatcher(
+            String[] digitStrings,
+            UnicodeSet groupingUniSet,
+            UnicodeSet decimalUniSet,
+            boolean isScientific) {
+        this.digitStrings = digitStrings;
+        this.groupingUniSet = groupingUniSet;
+        this.decimalUniSet = decimalUniSet;
+        separatorSet = groupingUniSet.cloneAsThawed().addAll(decimalUniSet).freeze();
+        this.isScientific = isScientific;
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        if (result.quantity != null && !isScientific) {
+            // A number has already been consumed.
+            return false;
+        }
+
+        int currGroup = 0;
+        int separator = -1;
+        int lastSeparatorOffset = segment.getOffset();
+        boolean hasPartialPrefix = false;
+        boolean seenBothSeparators = false;
+        while (segment.length() > 0) {
+            hasPartialPrefix = false;
+
+            // Attempt to match a digit.
+            byte digit = -1;
+
+            // Try by code point digit value.
+            int cp = segment.getCodePoint();
+            if (UCharacter.isDigit(cp)) {
+                segment.adjustOffset(Character.charCount(cp));
+                digit = (byte) UCharacter.digit(cp);
+            }
+
+            // Try by digit string.
+            if (digit == -1) {
+                for (int i = 0; i < digitStrings.length; i++) {
+                    String str = digitStrings[i];
+                    int overlap = segment.getCommonPrefixLength(str);
+                    if (overlap == str.length()) {
+                        segment.adjustOffset(str.length());
+                        digit = (byte) i;
+                    } else if (overlap == segment.length()) {
+                        hasPartialPrefix = true;
+                    }
+                }
+            }
+
+            // If found, save it in the DecimalQuantity or scientific adjustment.
+            if (digit >= 0) {
+                if (isScientific) {
+                    result.scientificAdjustment = digit + result.scientificAdjustment * 10;
+                } else {
+                    if (result.quantity == null) {
+                        result.quantity = new DecimalQuantity_DualStorageBCD();
+                    }
+                    result.quantity.appendDigit(digit, 0, true);
+                }
+                result.setCharsConsumed(segment);
+                currGroup++;
+                continue;
+            }
+
+            // Attempt to match a separator.
+            if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) {
+                if (separator == -1) {
+                    // First separator; could be either grouping or decimal.
+                    separator = cp;
+                    if (requireGroupingMatch && currGroup == 0) {
+                        break;
+                    }
+                } else if (separator == cp && groupingUniSet.contains(cp)) {
+                    // Second or later grouping separator.
+                    if (requireGroupingMatch && currGroup != grouping2) {
+                        break;
+                    }
+                } else if (separator != cp && decimalUniSet.contains(cp)) {
+                    // Decimal separator.
+                    if (requireGroupingMatch && currGroup != grouping1) {
+                        break;
+                    }
+                    seenBothSeparators = true;
+                } else {
+                    // Invalid separator.
+                    break;
+                }
+                currGroup = 0;
+                lastSeparatorOffset = segment.getOffset();
+                segment.adjustOffset(Character.charCount(cp));
+                continue;
+            }
+
+            break;
+        }
+
+        if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) {
+            result.quantity.adjustMagnitude(-currGroup);
+        } else if (requireGroupingMatch && separator != -1 && groupingUniSet.contains(separator)
+                && currGroup != grouping1) {
+            result.quantity.adjustMagnitude(-currGroup);
+            result.quantity.roundToMagnitude(0, RoundingUtils.mathContextUnlimited(RoundingMode.FLOOR));
+            segment.setOffset(lastSeparatorOffset);
+        }
+
+        return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate();
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<MantissaMatcher>";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherUtils.java
new file mode 100644 (file)
index 0000000..325837f
--- /dev/null
@@ -0,0 +1,15 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+/**
+ * @author sffc
+ *
+ */
+public class MatcherUtils {
+    public static boolean isValidCodePoint(int cp) {
+        return Character.isValidCodePoint(cp)
+                && (Character.isSupplementaryCodePoint(cp) || !Character.isSurrogate((char) cp));
+    }
+
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MinusSignMatcher.java
new file mode 100644 (file)
index 0000000..707fd44
--- /dev/null
@@ -0,0 +1,31 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class MinusSignMatcher extends SymbolMatcher {
+
+    public MinusSignMatcher() {
+        super("-", new UnicodeSet("[-_]"));
+    }
+
+    @Override
+    protected boolean isDisabled(ParsedNumber result) {
+        return 0 != (result.flags & ParsedNumber.FLAG_NEGATIVE);
+    }
+
+    @Override
+    protected void accept(ParsedNumber result) {
+        result.flags |= ParsedNumber.FLAG_NEGATIVE;
+    }
+
+    @Override
+    public String toString() {
+        return "<MinusSignMatcher>";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParseMatcher.java
new file mode 100644 (file)
index 0000000..5b87869
--- /dev/null
@@ -0,0 +1,32 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+/**
+ * @author sffc
+ *
+ */
+public interface NumberParseMatcher {
+    /**
+     * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds something
+     * interesting in the StringSegment, it should update the offset of the StringSegment corresponding to how many
+     * chars were matched.
+     *
+     * @param segment
+     *            The StringSegment to match against. Matches always start at the beginning of the segment. The segment
+     *            is guaranteed to contain at least one char.
+     * @param result
+     *            The data structure to store results if the match succeeds.
+     * @return Whether this matcher thinks there may be more interesting chars beyond the end of the string segment.
+     */
+    public boolean match(StringSegment segment, ParsedNumber result);
+
+    /**
+     * Method called at the end of a parse, after all matchers have failed to consume any more chars. Allows a matcher
+     * to make final modifications to the result given the knowledge that no more matches are possible.
+     *
+     * @param result
+     *            The data structure to store results.
+     */
+    public void postProcess(ParsedNumber result);
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
new file mode 100644 (file)
index 0000000..b01f800
--- /dev/null
@@ -0,0 +1,168 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import com.ibm.icu.impl.number.AffixPatternProvider;
+import com.ibm.icu.impl.number.AffixUtils;
+import com.ibm.icu.impl.number.MutablePatternModifier;
+import com.ibm.icu.impl.number.PatternStringParser;
+import com.ibm.icu.number.NumberFormatter.SignDisplay;
+import com.ibm.icu.number.NumberFormatter.UnitWidth;
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Primary number parsing implementation class.
+ *
+ * @author sffc
+ *
+ */
+public class NumberParserImpl {
+    public static NumberParserImpl createParserFromPattern(String pattern) {
+        NumberParserImpl parser = new NumberParserImpl();
+        ULocale locale = ULocale.ENGLISH;
+        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
+
+        MutablePatternModifier mod = new MutablePatternModifier(false);
+        AffixPatternProvider provider = PatternStringParser.parseToPatternInfo(pattern);
+        mod.setPatternInfo(provider);
+        mod.setPatternAttributes(SignDisplay.AUTO, false);
+        mod.setSymbols(symbols,
+                Currency.getInstance("USD"),
+                UnitWidth.FULL_NAME,
+                null);
+        int flags = 0;
+        if (provider.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
+            flags |= ParsedNumber.FLAG_PERCENT;
+        }
+        if (provider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
+            flags |= ParsedNumber.FLAG_PERMILLE;
+        }
+        AffixMatcher.generateFromPatternModifier(mod, flags, parser);
+
+        parser.addMatcher(DecimalMatcher.getInstance(symbols));
+        parser.addMatcher(WhitespaceMatcher.getInstance());
+        parser.addMatcher(new MinusSignMatcher());
+        parser.addMatcher(new ScientificMatcher(symbols));
+        parser.addMatcher(new CurrencyMatcher(locale));
+
+        parser.setComparator(new Comparator<ParsedNumber>() {
+            @Override
+            public int compare(ParsedNumber o1, ParsedNumber o2) {
+                return o1.charsConsumed - o2.charsConsumed;
+            }
+        });
+        parser.freeze();
+        return parser;
+    }
+
+    private final List<NumberParseMatcher> matchers;
+    private Comparator<ParsedNumber> comparator;
+    private boolean frozen;
+
+    public NumberParserImpl() {
+        matchers = new ArrayList<NumberParseMatcher>();
+        frozen = false;
+    }
+
+    public void addMatcher(NumberParseMatcher matcher) {
+        matchers.add(matcher);
+    }
+
+    public void setComparator(Comparator<ParsedNumber> comparator) {
+        this.comparator = comparator;
+    }
+
+    public void freeze() {
+        frozen = true;
+    }
+
+    public void parse(String input, boolean greedy, ParsedNumber result) {
+        assert frozen;
+        StringSegment segment = new StringSegment(input);
+        if (greedy) {
+            parseGreedyRecursive(segment, result);
+        } else {
+            parseLongestRecursive(segment, result);
+        }
+        for (NumberParseMatcher matcher : matchers) {
+            matcher.postProcess(result);
+        }
+    }
+
+    private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) {
+        // Base Case
+        if (segment.length() == 0) {
+            return;
+        }
+
+        int initialOffset = segment.getOffset();
+        for (int i = 0; i < matchers.size(); i++) {
+            NumberParseMatcher matcher = matchers.get(i);
+            matcher.match(segment, result);
+            if (segment.getOffset() != initialOffset) {
+                // In a greedy parse, recurse on only the first match.
+                parseGreedyRecursive(segment, result);
+                // The following line resets the offset so that the StringSegment says the same across the function
+                // call boundary. Since we recurse only once, this line is not strictly necessary.
+                segment.setOffset(initialOffset);
+                return;
+            }
+        }
+
+        // NOTE: If we get here, the greedy parse completed without consuming the entire string.
+    }
+
+    private void parseLongestRecursive(StringSegment segment, ParsedNumber result) {
+        // Base Case
+        if (segment.length() == 0) {
+            return;
+        }
+
+        // TODO: Give a nice way for the matcher to reset the ParsedNumber?
+        ParsedNumber initial = new ParsedNumber();
+        initial.copyFrom(result);
+        ParsedNumber candidate = new ParsedNumber();
+
+        int initialOffset = segment.getOffset();
+        for (int i = 0; i < matchers.size(); i++) {
+            NumberParseMatcher matcher = matchers.get(i);
+            // In a non-greedy parse, we attempt all possible matches and pick the best.
+            for (int charsToConsume = 1; charsToConsume <= segment.length(); charsToConsume++) {
+                candidate.copyFrom(initial);
+
+                // Run the matcher on a segment of the current length.
+                segment.setLength(charsToConsume);
+                boolean maybeMore = matcher.match(segment, candidate);
+                segment.resetLength();
+
+                // If the entire segment was consumed, recurse.
+                if (segment.getOffset() - initialOffset == charsToConsume) {
+                    parseLongestRecursive(segment, candidate);
+                    if (comparator.compare(candidate, result) > 0) {
+                        result.copyFrom(candidate);
+                    }
+                }
+
+                // Since the segment can be re-used, reset the offset.
+                // This does not have an effect if the matcher did not consume any chars.
+                segment.setOffset(initialOffset);
+
+                // Unless the matcher wants to see the next char, continue to the next matcher.
+                if (!maybeMore) {
+                    break;
+                }
+            }
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "<NumberParserImpl matchers=" + matchers.toString() + ">";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsedNumber.java
new file mode 100644 (file)
index 0000000..3ab53f1
--- /dev/null
@@ -0,0 +1,52 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.impl.number.DecimalQuantity;
+import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
+
+/**
+ * @author sffc
+ *
+ */
+public class ParsedNumber {
+
+    public DecimalQuantity_DualStorageBCD quantity = null;
+    public int charsConsumed = 0;
+    public int flags = 0;
+    public String prefix = null;
+    public String suffix = null;
+    public int scientificAdjustment = 0;
+    public String currencyCode = null;
+
+    public static final int FLAG_NEGATIVE = 0x0001;
+    public static final int FLAG_PERCENT = 0x0002;
+    public static final int FLAG_PERMILLE = 0x0004;
+
+    /**
+     * @param other
+     */
+    public void copyFrom(ParsedNumber other) {
+        quantity = other.quantity;
+        charsConsumed = other.charsConsumed;
+        flags = other.flags;
+        prefix = other.prefix;
+        suffix = other.suffix;
+        scientificAdjustment = other.scientificAdjustment;
+        currencyCode = other.currencyCode;
+    }
+
+    public void setCharsConsumed(StringSegment segment) {
+        charsConsumed = segment.getOffset();
+    }
+
+    public double getDouble() {
+        DecimalQuantity copy = quantity.createCopy();
+        copy.adjustMagnitude(scientificAdjustment);
+        double d = copy.toDouble();
+        if (0 != (flags & FLAG_NEGATIVE)) {
+            d = -d;
+        }
+        return d;
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
new file mode 100644 (file)
index 0000000..1fae159
--- /dev/null
@@ -0,0 +1,59 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+
+/**
+ * @author sffc
+ *
+ */
+public class ScientificMatcher implements NumberParseMatcher {
+
+    private final String exponentSeparatorString;
+    private final DecimalMatcher exponentMatcher;
+
+    public ScientificMatcher(DecimalFormatSymbols symbols) {
+        exponentSeparatorString = symbols.getExponentSeparator();
+        exponentMatcher = DecimalMatcher.getExponentInstance(symbols);
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        // Only accept scientific notation after the mantissa.
+        if (result.quantity == null) {
+            return false;
+        }
+
+        // First match the scientific separator, and then match another number after it.
+        int overlap = segment.getCommonPrefixLength(exponentSeparatorString);
+        if (overlap == exponentSeparatorString.length()) {
+            // Full exponent separator match; try to match digits.
+            segment.adjustOffset(overlap);
+            int digitsOffset = segment.getOffset();
+            boolean digitsReturnValue = exponentMatcher.match(segment, result);
+            if (segment.getOffset() == digitsOffset) {
+                // No digits were matched; un-match the exponent separator.
+                segment.adjustOffset(-overlap);
+            }
+            return digitsReturnValue;
+
+        } else if (overlap == segment.length()) {
+            // Partial exponent separator match
+            return true;
+        }
+
+        // No match
+        return false;
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<ScientificMatcher " + exponentSeparatorString + ">";
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
new file mode 100644 (file)
index 0000000..16a58e5
--- /dev/null
@@ -0,0 +1,105 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+/**
+ * A mutable class allowing for a String with a variable offset and length. The charAt, length, and subSequence methods
+ * all operate relative to the fixed offset into the String.
+ *
+ * @author sffc
+ */
+public class StringSegment implements CharSequence {
+    private final String str;
+    private int start;
+    private int end;
+
+    public StringSegment(String str) {
+        this.str = str;
+        this.start = 0;
+        this.end = str.length();
+    }
+
+    public int getOffset() {
+        return start;
+    }
+
+    public void setOffset(int start) {
+        assert start <= end;
+        this.start = start;
+    }
+
+    public void adjustOffset(int delta) {
+        assert start + delta >= 0;
+        assert start + delta <= end;
+        start += delta;
+    }
+
+    public void setLength(int length) {
+        assert length >= 0;
+        assert start + length <= str.length();
+        end = start + length;
+    }
+
+    public void resetLength() {
+        end = str.length();
+    }
+
+    @Override
+    public int length() {
+        return end - start;
+    }
+
+    @Override
+    public char charAt(int index) {
+        return str.charAt(index + start);
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+        throw new AssertionError(); // Never used
+        // Possible implementation:
+        // return str.subSequence(start + this.start, end + this.start);
+    }
+
+    /**
+     * Returns the first code point in the string segment, or -1 if the string starts with an invalid code point.
+     */
+    public int getCodePoint() {
+        assert start < end;
+        char lead = str.charAt(start);
+        if (Character.isHighSurrogate(lead) && start + 1 < end) {
+            return Character.toCodePoint(lead, str.charAt(start + 1));
+        } else if (Character.isSurrogate(lead)) {
+            return -1;
+        } else {
+            return lead;
+        }
+    }
+
+    /**
+     * Returns whether the segment is one char in length, and that the char is a leading surrogate.
+     */
+    public boolean isLeadingSurrogate() {
+        return (end - start == 1) && Character.isHighSurrogate(str.charAt(start));
+    }
+
+    /**
+     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For example, if this
+     * string segment is "aab", and the char sequence is "aac", this method returns 2, since the first 2 characters are
+     * the same.
+     */
+    public int getCommonPrefixLength(CharSequence other) {
+        int offset = 0;
+        for (; offset < Math.min(length(), other.length()); offset++) {
+            if (charAt(offset) != other.charAt(offset)) {
+                break;
+            }
+        }
+        return offset;
+    }
+
+    @Override
+    public String toString() {
+        return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
+    }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
new file mode 100644 (file)
index 0000000..efa17be
--- /dev/null
@@ -0,0 +1,50 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public abstract class SymbolMatcher implements NumberParseMatcher {
+    private final String string;
+    private final UnicodeSet uniSet;
+
+    protected SymbolMatcher(String symbolString, UnicodeSet symbolUniSet) {
+        string = symbolString;
+        uniSet = symbolUniSet;
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        // Smoke test first; this matcher might be disabled.
+        if (isDisabled(result)) {
+            return false;
+        }
+
+        int cp = segment.getCodePoint();
+        if (cp != -1 && uniSet.contains(cp)) {
+            accept(result);
+            segment.adjustOffset(Character.charCount(cp));
+            return false;
+        }
+        int overlap = segment.getCommonPrefixLength(string);
+        if (overlap == string.length()) {
+            accept(result);
+            segment.adjustOffset(string.length());
+            return false;
+        }
+        return overlap == segment.length();
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    protected abstract boolean isDisabled(ParsedNumber result);
+
+    protected abstract void accept(ParsedNumber result);
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/WhitespaceMatcher.java
new file mode 100644 (file)
index 0000000..51ed99c
--- /dev/null
@@ -0,0 +1,48 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class WhitespaceMatcher implements NumberParseMatcher {
+
+    // This set was decided after discussion with icu-design@. See ticket #13309.
+    // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+    private static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
+
+    private static final WhitespaceMatcher INSTANCE = new WhitespaceMatcher();
+
+    public static WhitespaceMatcher getInstance() {
+        return INSTANCE;
+    }
+
+    private WhitespaceMatcher() {
+    }
+
+    @Override
+    public boolean match(StringSegment segment, ParsedNumber result) {
+        while (segment.length() > 0) {
+            int cp = segment.getCodePoint();
+            if (cp == -1 || !UNISET_WHITESPACE.contains(cp)) {
+                break;
+            }
+            segment.adjustOffset(Character.charCount(cp));
+            // Note: Do not touch the charsConsumed.
+        }
+        return segment.length() == 0 || segment.isLeadingSurrogate();
+    }
+
+    @Override
+    public void postProcess(ParsedNumber result) {
+        // No-op
+    }
+
+    @Override
+    public String toString() {
+        return "<WhitespaceMatcher>";
+    }
+}
index 3ac0de71572758d9eebfd19c8e96a04265fb0d47..98df5ec88fe3d6d01955fe8268efc3939c8586a6 100644 (file)
@@ -3031,11 +3031,11 @@ public class TimeZoneFormat extends UFormat implements Freezable<TimeZoneFormat>
             }
         }
 
-        int[] matchLen = new int[] {0};
-        Iterator<String> itr = ZONE_ID_TRIE.get(text, pos.getIndex(), matchLen);
+        TextTrieMap.Output trieOutput = new TextTrieMap.Output();
+        Iterator<String> itr = ZONE_ID_TRIE.get(text, pos.getIndex(), trieOutput);
         if (itr != null) {
             resolvedID = itr.next();
-            pos.setIndex(pos.getIndex() + matchLen[0]);
+            pos.setIndex(pos.getIndex() + trieOutput.matchLength);
         } else {
             // TODO
             // We many need to handle rule based custom zone ID (See ZoneMeta.parseCustomID),
@@ -3074,11 +3074,11 @@ public class TimeZoneFormat extends UFormat implements Freezable<TimeZoneFormat>
             }
         }
 
-        int[] matchLen = new int[] {0};
-        Iterator<String> itr = SHORT_ZONE_ID_TRIE.get(text, pos.getIndex(), matchLen);
+        TextTrieMap.Output trieOutput = new TextTrieMap.Output();
+        Iterator<String> itr = SHORT_ZONE_ID_TRIE.get(text, pos.getIndex(), trieOutput);
         if (itr != null) {
             resolvedID = itr.next();
-            pos.setIndex(pos.getIndex() + matchLen[0]);
+            pos.setIndex(pos.getIndex() + trieOutput.matchLength);
         } else {
             pos.setErrorIndex(pos.getIndex());
         }
index ef3d5be7bac1f4eb8ed026eca8668f5d8fa12a8e..44612d1379f6fa27b456ba020340727fd1bff702 100644 (file)
@@ -747,6 +747,15 @@ public class Currency extends MeasureUnit {
         return isoResult;
     }
 
+    public static TextTrieMap<CurrencyStringInfo> getParsingTrie(ULocale locale, int type) {
+        List<TextTrieMap<CurrencyStringInfo>> currencyTrieVec = getCurrencyTrieVec(locale);
+        if (type == Currency.LONG_NAME) {
+            return currencyTrieVec.get(0);
+        } else {
+            return currencyTrieVec.get(1);
+        }
+    }
+
     /**
      * @internal
      * @deprecated This API is ICU internal only.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
new file mode 100644 (file)
index 0000000..0c9fdf3
--- /dev/null
@@ -0,0 +1,85 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.number;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import org.junit.Test;
+
+import com.ibm.icu.impl.number.parse.NumberParserImpl;
+import com.ibm.icu.impl.number.parse.ParsedNumber;
+
+/**
+ * @author sffc
+ *
+ */
+public class NumberParserTest {
+    @Test
+    public void testBasic() {
+        Object[][] cases = new Object[][] {
+                // Fields:
+                // a) Flags:
+                // --- Bit 0x01 => Test greedy implementation
+                // --- Bit 0x02 => Test slow implementation
+                // b) Input string
+                // c) Pattern
+                // d) Expected chars consumed
+                // e) Expected double result
+                { 3, "51423", "0", 5, 51423. },
+                { 3, "51423x", "0", 5, 51423. },
+                { 3, " 51423", "0", 6, 51423. },
+                { 3, "51423 ", "0", 5, 51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯", "0", 10, 51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯x", "0", 10, 51423. },
+                { 3, " 𝟱𝟭𝟰𝟮𝟯", "0", 11, 51423. },
+                { 3, "𝟱𝟭𝟰𝟮𝟯 ", "0", 10, 51423. },
+                { 3, "𝟱𝟭,𝟰𝟮𝟯", "0", 11, 51423. },
+                { 3, "𝟳𝟴,𝟵𝟱𝟭,𝟰𝟮𝟯", "0", 18, 78951423. },
+                { 3, "𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", "0", 18, 78951.423 },
+                { 3, "𝟳𝟴,𝟬𝟬𝟬", "0", 11, 78000. },
+                { 3, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", "0", 18, 78000. },
+                { 3, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", "0", 18, 78000.023 },
+                { 3, "𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", "0", 11, 78. },
+                { 3, "-𝟱𝟭𝟰𝟮𝟯", "0", 11, -51423. },
+                { 3, "-𝟱𝟭𝟰𝟮𝟯-", "0", 11, -51423. },
+                { 3, "a51423US dollars", "a0¤¤¤", 16, 51423. },
+                { 3, "a 51423 US dollars", "a0¤¤¤", 18, 51423. },
+                { 3, "a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 14, 51423. },
+                { 3, "-a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
+                { 3, "a -𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
+                { 1, "a40b", "a0'0b'", 3, 40. }, // greedy code path thinks "40" is the number
+                { 2, "a40b", "a0'0b'", 4, 4. }, // slow code path find the suffix "0b"
+                { 3, "𝟱.𝟭𝟰𝟮E𝟯", "0", 12, 5142. },
+                { 3, "5,142.50 Canadian dollars", "0", 25, 5142.5 },
+                { 3, "0", "0", 1, 0.0 } };
+
+        for (Object[] cas : cases) {
+            int flags = (Integer) cas[0];
+            String input = (String) cas[1];
+            String pattern = (String) cas[2];
+            int expectedCharsConsumed = (Integer) cas[3];
+            double resultDouble = (Double) cas[4];
+            NumberParserImpl parser = NumberParserImpl.createParserFromPattern(pattern);
+            String message = "Input <" + input + "> Parser " + parser;
+
+            if (0 != (flags & 0x01)) {
+                // Test greedy code path
+                ParsedNumber resultObject = new ParsedNumber();
+                parser.parse(input, true, resultObject);
+                assertNotNull(message, resultObject.quantity);
+                assertEquals(message, resultDouble, resultObject.getDouble(), 0.0);
+                assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
+            }
+
+            if (0 != (flags & 0x02)) {
+                // Test slow code path
+                ParsedNumber resultObject = new ParsedNumber();
+                parser.parse(input, false, resultObject);
+                assertNotNull(message, resultObject.quantity);
+                assertEquals(message, resultDouble, resultObject.getDouble(), 0.0);
+                assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
+            }
+        }
+    }
+}
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java
new file mode 100644 (file)
index 0000000..6b52e54
--- /dev/null
@@ -0,0 +1,109 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.dev.test.number;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+import com.ibm.icu.impl.number.parse.StringSegment;
+
+/**
+ * @author sffc
+ *
+ */
+public class StringSegmentTest {
+    static final String SAMPLE_STRING = "📻 radio 📻";
+
+    @Test
+    public void testOffset() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertEquals(0, segment.getOffset());
+        segment.adjustOffset(3);
+        assertEquals(3, segment.getOffset());
+        segment.adjustOffset(2);
+        assertEquals(5, segment.getOffset());
+        segment.setOffset(4);
+        assertEquals(4, segment.getOffset());
+    }
+
+    @Test
+    public void testLength() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertEquals(11, segment.length());
+        segment.adjustOffset(3);
+        assertEquals(8, segment.length());
+        segment.setLength(4);
+        assertEquals(4, segment.length());
+        segment.setOffset(5);
+        assertEquals(2, segment.length());
+        segment.resetLength();
+        assertEquals(6, segment.length());
+    }
+
+    @Test
+    public void testCharAt() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertCharSequenceEquals(SAMPLE_STRING, segment);
+        segment.adjustOffset(3);
+        assertCharSequenceEquals("radio 📻", segment);
+        segment.setLength(5);
+        assertCharSequenceEquals("radio", segment);
+    }
+
+    @Test
+    public void testGetCodePoint() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertEquals(0x1F4FB, segment.getCodePoint());
+        segment.setLength(1);
+        assertEquals(-1, segment.getCodePoint());
+        segment.resetLength();
+        segment.adjustOffset(1);
+        assertEquals(-1, segment.getCodePoint());
+        segment.adjustOffset(1);
+        assertEquals(0x20, segment.getCodePoint());
+    }
+
+    @Test
+    public void testIsLeadingSurrogate() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertFalse(segment.isLeadingSurrogate());
+        segment.setLength(1);
+        assertTrue(segment.isLeadingSurrogate());
+        segment.adjustOffset(1);
+        segment.setLength(1);
+        assertFalse(segment.isLeadingSurrogate()); // trail, not lead
+    }
+
+    @Test
+    public void testCommonPrefixLength() {
+        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
+        assertEquals(4, segment.getCommonPrefixLength("📻 r"));
+        assertEquals(3, segment.getCommonPrefixLength("📻 x"));
+        assertEquals(0, segment.getCommonPrefixLength("x"));
+        assertEquals(0, segment.getCommonPrefixLength(""));
+        segment.adjustOffset(3);
+        assertEquals(5, segment.getCommonPrefixLength("radio"));
+        assertEquals(2, segment.getCommonPrefixLength("rafio"));
+        assertEquals(0, segment.getCommonPrefixLength("fadio"));
+        assertEquals(0, segment.getCommonPrefixLength(""));
+        segment.setLength(3);
+        assertEquals(3, segment.getCommonPrefixLength("radio"));
+        assertEquals(2, segment.getCommonPrefixLength("rafio"));
+        assertEquals(0, segment.getCommonPrefixLength("fadio"));
+        assertEquals(0, segment.getCommonPrefixLength(""));
+        segment.resetLength();
+        segment.setOffset(11); // end of string
+        assertEquals(0, segment.getCommonPrefixLength("foo"));
+    }
+
+    private static void assertCharSequenceEquals(CharSequence a, CharSequence b) {
+        assertEquals(a.length(), b.length());
+        for (int i = 0; i < a.length(); i++) {
+            assertEquals(a.charAt(i), b.charAt(i));
+        }
+    }
+}