]> granicus.if.org Git - icu/commitdiff
ICU-13571 Switching number parsing code back to incremental code point case folding.
authorShane Carr <shane@unicode.org>
Thu, 8 Feb 2018 08:43:12 +0000 (08:43 +0000)
committerShane Carr <shane@unicode.org>
Thu, 8 Feb 2018 08:43:12 +0000 (08:43 +0000)
X-SVN-Rev: 40868

15 files changed:
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java

index ee041f64ab26389a5ca75dc10f0b6964d269ebea..43d3888579afd6d6a3481d65b9562a53ea1f9b67 100644 (file)
@@ -35,7 +35,6 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
             return null;
         }
 
-        affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
         AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
         series.factory = factory;
         series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null
index 385e73a5d89cd93fa9bfffc298de78884f067a20..8a0b7b9beb4d87cb5fed55904e65c491feecc268 100644 (file)
@@ -24,8 +24,8 @@ public class CodePointMatcher implements NumberParseMatcher {
 
     @Override
     public boolean match(StringSegment segment, ParsedNumber result) {
-        if (segment.getCodePoint() == cp) {
-            segment.adjustOffset(Character.charCount(cp));
+        if (segment.matches(cp)) {
+            segment.adjustOffsetByCodePoint();
             result.setCharsConsumed(segment);
         }
         return false;
index e760a0142b835e2a129ab5b2b0d6079d3e4c1ad8..d81c2e9f81a5e4ee1fcb44e68472526cb8963c01 100644 (file)
@@ -15,10 +15,10 @@ public class CurrencyMatcher implements NumberParseMatcher {
     private final String currency1;
     private final String currency2;
 
-    public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
+    public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
         return new CurrencyMatcher(currency.getSubtype(),
-                ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
-                ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
+                currency.getSymbol(loc),
+                currency.getCurrencyCode());
     }
 
     private CurrencyMatcher(String isoCode, String currency1, String currency2) {
index a1e3675869391e887c5a6b506e21a550a18635b1..d5640d4aadb08d60c23defe429cc862a3a88c3d1 100644 (file)
@@ -15,7 +15,6 @@ public class MatcherFactory {
     DecimalFormatSymbols symbols;
     IgnorablesMatcher ignorables;
     ULocale locale;
-    int parseFlags;
 
     public MinusSignMatcher minusSign(boolean allowTrailing) {
         return MinusSignMatcher.getInstance(symbols, allowTrailing);
@@ -35,7 +34,7 @@ public class MatcherFactory {
 
     public AnyMatcher currency() {
         AnyMatcher any = new AnyMatcher();
-        any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+        any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
         any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
         any.freeze();
         return any;
index 7664e1e72b259d34a5caca78cc42bcf8e635629a..c5b01255e98de09b32896e215b09c9c31df3fcc7 100644 (file)
@@ -2,7 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
-import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.DecimalFormatSymbols;
 import com.ibm.icu.text.UnicodeSet;
 
@@ -13,14 +12,11 @@ import com.ibm.icu.text.UnicodeSet;
 public class NanMatcher extends SymbolMatcher {
 
     private static final NanMatcher DEFAULT = new NanMatcher("NaN");
-    private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true));
 
     public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) {
-        String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags);
+        String symbolString = symbols.getNaN();
         if (DEFAULT.string.equals(symbolString)) {
             return DEFAULT;
-        } else if (DEFAULT_FOLDED.string.equals(symbolString)) {
-            return DEFAULT_FOLDED;
         } else {
             return new NanMatcher(symbolString);
         }
index ff59ca052cc57b4a913707d640311b586d5c4bc8..577cc1f850bc85a4372043000e9471a0b421c6d3 100644 (file)
@@ -31,6 +31,30 @@ import com.ibm.icu.util.ULocale;
  */
 public class NumberParserImpl {
 
+    @Deprecated
+    public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) {
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);
+        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
+        IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
+
+        MatcherFactory factory = new MatcherFactory();
+        factory.currency = Currency.getInstance("USD");
+        factory.symbols = symbols;
+        factory.ignorables = ignorables;
+        factory.locale = locale;
+
+        ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
+        AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
+
+        Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
+        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
+        parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+        parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
+
+        parser.freeze();
+        return parser;
+    }
+
     // TODO: Find a better place for this enum.
     /** Controls the set of rules for parsing a string. */
     public static enum ParseMode {
@@ -74,12 +98,13 @@ public class NumberParserImpl {
         // Temporary frontend for testing.
 
         int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
-                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
+                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
+                | ParsingUtils.PARSE_FLAG_OPTIMIZE;
         if (strictGrouping) {
             parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
         }
 
-        NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);
         DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
         IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
 
@@ -88,7 +113,6 @@ public class NumberParserImpl {
         factory.symbols = symbols;
         factory.ignorables = ignorables;
         factory.locale = locale;
-        factory.parseFlags = parseFlags;
 
         ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
         AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
@@ -99,7 +123,7 @@ public class NumberParserImpl {
         parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
         parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
         parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
-        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
         parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
         parser.addMatcher(new RequireNumberMatcher());
 
@@ -193,16 +217,18 @@ public class NumberParserImpl {
         if (parseCurrency || patternInfo.hasCurrencySign()) {
             parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
         }
+        if (optimize) {
+            parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE;
+        }
         IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
 
-        NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);
 
         MatcherFactory factory = new MatcherFactory();
         factory.currency = currency;
         factory.symbols = symbols;
         factory.ignorables = ignorables;
         factory.locale = locale;
-        factory.parseFlags = parseFlags;
 
         //////////////////////
         /// AFFIX MATCHERS ///
@@ -216,7 +242,7 @@ public class NumberParserImpl {
         ////////////////////////
 
         if (parseCurrency || patternInfo.hasCurrencySign()) {
-            parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+            parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
             parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
         }
 
@@ -239,7 +265,7 @@ public class NumberParserImpl {
         parser.addMatcher(ignorables);
         parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
         if (!properties.getParseNoExponent()) {
-            parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+            parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
         }
 
         //////////////////
@@ -290,9 +316,9 @@ public class NumberParserImpl {
      *            or twice, set this to false; if it is going to be used hundreds of times, set it to
      *            true.
      */
-    public NumberParserImpl(int parseFlags, boolean optimize) {
+    public NumberParserImpl(int parseFlags) {
         matchers = new ArrayList<NumberParseMatcher>();
-        if (optimize) {
+        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
             leadCodePointses = new ArrayList<UnicodeSet>();
         } else {
             leadCodePointses = null;
@@ -306,9 +332,7 @@ public class NumberParserImpl {
         assert !frozen;
         this.matchers.add(matcher);
         if (leadCodePointses != null) {
-            UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
-            assert leadCodePoints.isFrozen();
-            this.leadCodePointses.add(leadCodePoints);
+            addLeadCodePointsForMatcher(matcher);
         }
     }
 
@@ -317,13 +341,22 @@ public class NumberParserImpl {
         this.matchers.addAll(matchers);
         if (leadCodePointses != null) {
             for (NumberParseMatcher matcher : matchers) {
-                UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
-                assert leadCodePoints.isFrozen();
-                this.leadCodePointses.add(leadCodePoints);
+                addLeadCodePointsForMatcher(matcher);
             }
         }
     }
 
+    private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
+        UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
+        assert leadCodePoints.isFrozen();
+        // TODO: Avoid the clone operation here.
+        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
+            leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
+                    .freeze();
+        }
+        this.leadCodePointses.add(leadCodePoints);
+    }
+
     public void setComparator(Comparator<ParsedNumber> comparator) {
         assert !frozen;
         this.comparator = comparator;
@@ -353,7 +386,7 @@ public class NumberParserImpl {
     public void parse(String input, int start, boolean greedy, ParsedNumber result) {
         assert frozen;
         assert start >= 0 && start < input.length();
-        StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags));
+        StringSegment segment = new StringSegment(input, parseFlags);
         segment.adjustOffset(start);
         if (greedy) {
             parseGreedyRecursive(segment, result);
index c4a0005c0e70a3c4b4931e5bc940fba5531cfaea..4d17cd618af952f1ba5bdef9f1b427bc5b92fba1 100644 (file)
@@ -2,7 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
-import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSet.EntryRange;
 
@@ -23,6 +22,7 @@ public class ParsingUtils {
     public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
     public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
     public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
+    public static final int PARSE_FLAG_OPTIMIZE = 0x1000;
 
     public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
         for (EntryRange range : input.ranges()) {
@@ -39,16 +39,4 @@ public class ParsingUtils {
         }
     }
 
-    /**
-     * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
-     */
-    public static String maybeFold(String input, int parseFlags) {
-        UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
-        if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
-            return UCharacter.foldCase(input, true);
-        } else {
-            return input;
-        }
-    }
-
 }
index a6c053af7eaa0e56e6f34a7dab8941b8936fbec9..329ee12ba6f5271015718ae1c925aecee6eb1884 100644 (file)
@@ -15,16 +15,13 @@ public class ScientificMatcher implements NumberParseMatcher {
     private final String exponentSeparatorString;
     private final DecimalMatcher exponentMatcher;
 
-    public static ScientificMatcher getInstance(
-            DecimalFormatSymbols symbols,
-            Grouper grouper,
-            int parseFlags) {
+    public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) {
         // TODO: Static-initialize most common instances?
-        return new ScientificMatcher(symbols, grouper, parseFlags);
+        return new ScientificMatcher(symbols, grouper);
     }
 
-    private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
-        exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
+    private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) {
+        exponentSeparatorString = symbols.getExponentSeparator();
         exponentMatcher = DecimalMatcher.getInstance(symbols,
                 grouper,
                 ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
@@ -47,19 +44,14 @@ public class ScientificMatcher implements NumberParseMatcher {
             if (segment.length() == 0) {
                 return true;
             }
-            int leadCp = segment.getCodePoint();
-            if (leadCp == -1) {
-                // Partial code point match
-                return true;
-            }
 
             // Allow a sign, and then try to match digits.
             boolean minusSign = false;
-            if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
+            if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
                 minusSign = true;
-                segment.adjustOffset(Character.charCount(leadCp));
-            } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
-                segment.adjustOffset(Character.charCount(leadCp));
+                segment.adjustOffsetByCodePoint();
+            } else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
+                segment.adjustOffsetByCodePoint();
             }
 
             int digitsOffset = segment.getOffset();
index 6b92df6e368d1f9b8362be65f9badb8b74021ef4..bc0cab0c5d02bd733dd8e09e9ed418146c922384 100644 (file)
@@ -2,6 +2,9 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;
 
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UnicodeSet;
+
 /**
  * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
  * subSequence methods all operate relative to the fixed offset into the String.
@@ -12,11 +15,13 @@ public class StringSegment implements CharSequence {
     private final String str;
     private int start;
     private int end;
+    private boolean foldCase;
 
-    public StringSegment(String str) {
+    public StringSegment(String str, int parseFlags) {
         this.str = str;
         this.start = 0;
         this.end = str.length();
+        this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
     }
 
     public int getOffset() {
@@ -42,6 +47,13 @@ public class StringSegment implements CharSequence {
         start += delta;
     }
 
+    /**
+     * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
+     */
+    public void adjustOffsetByCodePoint() {
+        start += Character.charCount(getCodePoint());
+    }
+
     public void setLength(int length) {
         assert length >= 0;
         assert start + length <= str.length();
@@ -72,6 +84,10 @@ public class StringSegment implements CharSequence {
     /**
      * Returns the first code point in the string segment, or -1 if the string starts with an invalid
      * code point.
+     *
+     * <p>
+     * <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
+     * folding logic, instead of this method.
      */
     public int getCodePoint() {
         assert start < end;
@@ -85,15 +101,56 @@ public class StringSegment implements CharSequence {
         }
     }
 
+    /**
+     * Returns true if the first code point of this StringSegment equals the given code point.
+     *
+     * <p>
+     * This method will perform case folding if case folding is enabled for the parser.
+     */
+    public boolean matches(int otherCp) {
+        return codePointsEqual(getCodePoint(), otherCp, foldCase);
+    }
+
+    /**
+     * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
+     */
+    public boolean matches(UnicodeSet uniset) {
+        // TODO: Move UnicodeSet case-folding logic here.
+        // TODO: Handle string matches here instead of separately.
+        int cp = getCodePoint();
+        if (cp == -1) {
+            return false;
+        }
+        return uniset.contains(cp);
+    }
+
     /**
      * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
      * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
      * since the first 2 characters are the same.
+     *
+     * <p>
+     * This method will perform case folding if case folding is enabled for the parser.
      */
     public int getCommonPrefixLength(CharSequence other) {
+        return getPrefixLengthInternal(other, foldCase);
+    }
+
+    /**
+     * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
+     * enabled for the parser.
+     */
+    public int getCaseSensitivePrefixLength(CharSequence other) {
+        return getPrefixLengthInternal(other, false);
+    }
+
+    private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
         int offset = 0;
         for (; offset < Math.min(length(), other.length());) {
-            if (charAt(offset) != other.charAt(offset)) {
+            // TODO: case-fold code points, not chars
+            char c1 = charAt(offset);
+            char c2 = other.charAt(offset);
+            if (!codePointsEqual(c1, c2, foldCase)) {
                 break;
             }
             offset++;
@@ -101,6 +158,30 @@ public class StringSegment implements CharSequence {
         return offset;
     }
 
+    // /**
+    // * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
+    // */
+    // public static String maybeFold(String input, int parseFlags) {
+    // UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
+    // if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
+    // return UCharacter.foldCase(input, true);
+    // } else {
+    // return input;
+    // }
+    // }
+
+    private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
+        if (cp1 == cp2) {
+            return true;
+        }
+        if (!foldCase) {
+            return false;
+        }
+        cp1 = UCharacter.foldCase(cp1, true);
+        cp2 = UCharacter.foldCase(cp2, true);
+        return cp1 == cp2;
+    }
+
     @Override
     public String toString() {
         return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
index e31841e6872908b08fb905e568ca33ee562ff43c..bf15d726b7a3d172f2687834f6398badf4888539 100644 (file)
@@ -47,9 +47,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
             }
         }
 
-        int cp = segment.getCodePoint();
-        if (cp != -1 && uniSet.contains(cp)) {
-            segment.adjustOffset(Character.charCount(cp));
+        if (segment.matches(uniSet)) {
+            segment.adjustOffsetByCodePoint();
             accept(segment, result);
             return false;
         }
index bf0593e1230ef6261d055f995ee7b42f71e0aa2b..d458f07de35cf7ef652e3df73b5ebdd1c81633ed 100644 (file)
@@ -51,7 +51,7 @@ public class UnicodeSetStaticCache {
         DIGITS,
         NAN_LEAD,
         SCIENTIFIC_LEAD,
-        CWCF,
+        CWCF, // TODO: Check if this is being used and remove it if not.
 
         // Combined Separators with Digits (for lead code points)
         DIGITS_OR_ALL_SEPARATORS,
index 7320845580c9ddacd7ef23db26bdd59d6e81a286..e5e81bb5d26bd66bed46f4c6acd031cbe409cebd 100644 (file)
@@ -16,6 +16,7 @@ import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.lang.reflect.Field;
 import java.text.FieldPosition;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -42,6 +43,7 @@ import com.ibm.icu.text.MeasureFormat;
 import com.ibm.icu.text.MeasureFormat.FormatWidth;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.CurrencyAmount;
 import com.ibm.icu.util.Measure;
 import com.ibm.icu.util.MeasureUnit;
 import com.ibm.icu.util.NoUnit;
@@ -1925,6 +1927,15 @@ public class MeasureUnitTest extends TestFmwk {
         assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj);
     }
 
+    @Test
+    public void testCurrencyFormatParseIsoCode() throws ParseException {
+        MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH);
+        CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56");
+        assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0);
+        assertEquals("Should parse ISO code GTQ even though the currency is USD",
+                "GTQ", result.getCurrency().getCurrencyCode());
+    }
+
     @Test
     public void testDoubleZero() {
         ULocale en = new ULocale("en");
index f449d885ae644e6cb1f26277a09586295139874c..4697abd5da48ef8cd0fe1ba0555458d659f6472c 100644 (file)
@@ -868,7 +868,7 @@ public class NumberFormatTest extends TestFmwk {
                 new ParseCurrencyItem( "en_GB", "euros4",   "4,00\u00A0\u20AC", 6,400,  "EUR" ),
                 new ParseCurrencyItem( "en_GB", "euros6",   "6\u00A0\u20AC",    3,  6,  "EUR" ),
                 new ParseCurrencyItem( "en_GB", "euros8",   "\u20AC8",          2,  8,  "EUR" ),
-                new ParseCurrencyItem( "en_GB", "dollars4", "US$4",             0,  0,  "USD" ),
+                new ParseCurrencyItem( "en_GB", "dollars4", "US$4",             4,  4,  "USD" ),
 
                 new ParseCurrencyItem( "fr_FR", "euros4",   "4,00\u00A0\u20AC", 6,  4,  "EUR" ),
                 new ParseCurrencyItem( "fr_FR", "euros6",   "6\u00A0\u20AC",    3,  6,  "EUR" ),
@@ -2018,7 +2018,6 @@ public class NumberFormatTest extends TestFmwk {
     };
 
     @SuppressWarnings("resource")  // InputStream is will be closed by the ResourceReader.
-    @Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571")
     @Test
     public void TestCases() {
         String caseFileName = "NumberFormatTestCases.txt";
index 4e69a76258146385241789633754c3aa76a50d7f..70513ce5f1adba1efd1867c62d2e17251753708d 100644 (file)
@@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
 import com.ibm.icu.impl.number.parse.MinusSignMatcher;
 import com.ibm.icu.impl.number.parse.NumberParserImpl;
 import com.ibm.icu.impl.number.parse.ParsedNumber;
+import com.ibm.icu.impl.number.parse.ParsingUtils;
 import com.ibm.icu.impl.number.parse.PercentMatcher;
 import com.ibm.icu.impl.number.parse.PlusSignMatcher;
 import com.ibm.icu.impl.number.parse.SeriesMatcher;
@@ -191,7 +192,7 @@ public class NumberParserTest {
             int expectedOffset = (Integer) cas[1];
             boolean expectedMaybeMore = (Boolean) cas[2];
 
-            StringSegment segment = new StringSegment(input);
+            StringSegment segment = new StringSegment(input, 0);
             ParsedNumber result = new ParsedNumber();
             boolean actualMaybeMore = series.match(segment, result);
             int actualOffset = segment.getOffset();
@@ -215,4 +216,39 @@ public class NumberParserTest {
                 result.getNumber().doubleValue(),
                 0.0);
     }
+
+    @Test
+    public void testCaseFolding() {
+        Object[][] cases = new Object[][] {
+                // pattern, input string, case sensitive chars, case insensitive chars
+                { "0", "JP¥3456", 7, 7 },
+                { "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode
+                { "A0", "A5", 2, 2 },
+                { "A0", "a5", 0, 2 },
+                { "0", "NaN", 3, 3 },
+                { "0", "nan", 0, 3 } };
+        for (Object[] cas : cases) {
+            String patternString = (String) cas[0];
+            String inputString = (String) cas[1];
+            int expectedCaseSensitiveChars = (Integer) cas[2];
+            int expectedCaseFoldingChars = (Integer) cas[3];
+
+            NumberParserImpl caseSensitiveParser = NumberParserImpl
+                    .removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE);
+            ParsedNumber result = new ParsedNumber();
+            caseSensitiveParser.parse(inputString, true, result);
+            assertEquals("Case-Sensitive: " + inputString + " on " + patternString,
+                    expectedCaseSensitiveChars,
+                    result.charEnd);
+
+            NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH,
+                    patternString,
+                    ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE);
+            result = new ParsedNumber();
+            caseFoldingParser.parse(inputString, true, result);
+            assertEquals("Folded: " + inputString + " on " + patternString,
+                    expectedCaseFoldingChars,
+                    result.charEnd);
+        }
+    }
 }
index 016fa581c98653baa06d8d3798f72a4ce213e36d..cb4106ad93414aa7346f989966dcdbe7c177a28c 100644 (file)
@@ -17,7 +17,7 @@ public class StringSegmentTest {
 
     @Test
     public void testOffset() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
         assertEquals(0, segment.getOffset());
         segment.adjustOffset(3);
         assertEquals(3, segment.getOffset());
@@ -29,7 +29,7 @@ public class StringSegmentTest {
 
     @Test
     public void testLength() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
         assertEquals(11, segment.length());
         segment.adjustOffset(3);
         assertEquals(8, segment.length());
@@ -43,7 +43,7 @@ public class StringSegmentTest {
 
     @Test
     public void testCharAt() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
         assertCharSequenceEquals(SAMPLE_STRING, segment);
         segment.adjustOffset(3);
         assertCharSequenceEquals("radio 📻", segment);
@@ -53,7 +53,7 @@ public class StringSegmentTest {
 
     @Test
     public void testGetCodePoint() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
         assertEquals(0x1F4FB, segment.getCodePoint());
         segment.setLength(1);
         assertEquals(-1, segment.getCodePoint());
@@ -66,7 +66,7 @@ public class StringSegmentTest {
 
     @Test
     public void testCommonPrefixLength() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
         assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
         assertEquals(4, segment.getCommonPrefixLength("📻 r"));
         assertEquals(3, segment.getCommonPrefixLength("📻 x"));