From: Shane Carr Date: Thu, 8 Feb 2018 08:43:12 +0000 (+0000) Subject: ICU-13571 Switching number parsing code back to incremental code point case folding. X-Git-Tag: release-61-rc~115 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=398b17f2b5c5cc734674237da8f43d320343f393;p=icu ICU-13571 Switching number parsing code back to incremental code point case folding. X-SVN-Rev: 40868 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java index ee041f64ab2..43d3888579a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java @@ -35,7 +35,6 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok return null; } - affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags); AffixPatternMatcher series = new AffixPatternMatcher(affixPattern); series.factory = factory; series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java index 385e73a5d89..8a0b7b9beb4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java @@ -24,8 +24,8 @@ public class CodePointMatcher implements NumberParseMatcher { @Override public boolean match(StringSegment segment, ParsedNumber result) { - if (segment.getCodePoint() == cp) { - segment.adjustOffset(Character.charCount(cp)); + if (segment.matches(cp)) { + segment.adjustOffsetByCodePoint(); result.setCharsConsumed(segment); } return false; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java index e760a0142b8..d81c2e9f81a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java @@ -15,10 +15,10 @@ public class CurrencyMatcher implements NumberParseMatcher { private final String currency1; private final String currency2; - public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) { + public static CurrencyMatcher getInstance(Currency currency, ULocale loc) { return new CurrencyMatcher(currency.getSubtype(), - ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags), - ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags)); + currency.getSymbol(loc), + currency.getCurrencyCode()); } private CurrencyMatcher(String isoCode, String currency1, String currency2) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java index a1e36758693..d5640d4aadb 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java @@ -15,7 +15,6 @@ public class MatcherFactory { DecimalFormatSymbols symbols; IgnorablesMatcher ignorables; ULocale locale; - int parseFlags; public MinusSignMatcher minusSign(boolean allowTrailing) { return MinusSignMatcher.getInstance(symbols, allowTrailing); @@ -35,7 +34,7 @@ public class MatcherFactory { public AnyMatcher currency() { AnyMatcher any = new AnyMatcher(); - any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags)); + any.addMatcher(CurrencyMatcher.getInstance(currency, locale)); any.addMatcher(CurrencyTrieMatcher.getInstance(locale)); any.freeze(); return any; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java index 7664e1e72b2..c5b01255e98 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java @@ -2,7 +2,6 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; -import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.DecimalFormatSymbols; import com.ibm.icu.text.UnicodeSet; @@ -13,14 +12,11 @@ import com.ibm.icu.text.UnicodeSet; public class NanMatcher extends SymbolMatcher { private static final NanMatcher DEFAULT = new NanMatcher("NaN"); - private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true)); public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) { - String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags); + String symbolString = symbols.getNaN(); if (DEFAULT.string.equals(symbolString)) { return DEFAULT; - } else if (DEFAULT_FOLDED.string.equals(symbolString)) { - return DEFAULT_FOLDED; } else { return new NanMatcher(symbolString); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index ff59ca052cc..577cc1f850b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -31,6 +31,30 @@ import com.ibm.icu.util.ULocale; */ public class NumberParserImpl { + @Deprecated + public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) { + NumberParserImpl parser = new NumberParserImpl(parseFlags); + DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); + IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT; + + MatcherFactory factory = new MatcherFactory(); + factory.currency = Currency.getInstance("USD"); + factory.symbols = symbols; + factory.ignorables = ignorables; + factory.locale = locale; + + ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern); + AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags); + + Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo); + parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); + parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); + parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); + + parser.freeze(); + return parser; + } + // TODO: Find a better place for this enum. /** Controls the set of rules for parsing a string. */ public static enum ParseMode { @@ -74,12 +98,13 @@ public class NumberParserImpl { // Temporary frontend for testing. int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE - | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; + | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES + | ParsingUtils.PARSE_FLAG_OPTIMIZE; if (strictGrouping) { parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE; } - NumberParserImpl parser = new NumberParserImpl(parseFlags, true); + NumberParserImpl parser = new NumberParserImpl(parseFlags); DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale); IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT; @@ -88,7 +113,6 @@ public class NumberParserImpl { factory.symbols = symbols; factory.ignorables = ignorables; factory.locale = locale; - factory.parseFlags = parseFlags; ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern); AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags); @@ -99,7 +123,7 @@ public class NumberParserImpl { parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); - parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags)); + parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper)); parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); parser.addMatcher(new RequireNumberMatcher()); @@ -193,16 +217,18 @@ public class NumberParserImpl { if (parseCurrency || patternInfo.hasCurrencySign()) { parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS; } + if (optimize) { + parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE; + } IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT; - NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize); + NumberParserImpl parser = new NumberParserImpl(parseFlags); MatcherFactory factory = new MatcherFactory(); factory.currency = currency; factory.symbols = symbols; factory.ignorables = ignorables; factory.locale = locale; - factory.parseFlags = parseFlags; ////////////////////// /// AFFIX MATCHERS /// @@ -216,7 +242,7 @@ public class NumberParserImpl { //////////////////////// if (parseCurrency || patternInfo.hasCurrencySign()) { - parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags)); + parser.addMatcher(CurrencyMatcher.getInstance(currency, locale)); parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); } @@ -239,7 +265,7 @@ public class NumberParserImpl { parser.addMatcher(ignorables); parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); if (!properties.getParseNoExponent()) { - parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags)); + parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper)); } ////////////////// @@ -290,9 +316,9 @@ public class NumberParserImpl { * or twice, set this to false; if it is going to be used hundreds of times, set it to * true. */ - public NumberParserImpl(int parseFlags, boolean optimize) { + public NumberParserImpl(int parseFlags) { matchers = new ArrayList(); - if (optimize) { + if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) { leadCodePointses = new ArrayList(); } else { leadCodePointses = null; @@ -306,9 +332,7 @@ public class NumberParserImpl { assert !frozen; this.matchers.add(matcher); if (leadCodePointses != null) { - UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); - assert leadCodePoints.isFrozen(); - this.leadCodePointses.add(leadCodePoints); + addLeadCodePointsForMatcher(matcher); } } @@ -317,13 +341,22 @@ public class NumberParserImpl { this.matchers.addAll(matchers); if (leadCodePointses != null) { for (NumberParseMatcher matcher : matchers) { - UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); - assert leadCodePoints.isFrozen(); - this.leadCodePointses.add(leadCodePoints); + addLeadCodePointsForMatcher(matcher); } } } + private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) { + UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); + assert leadCodePoints.isFrozen(); + // TODO: Avoid the clone operation here. + if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) { + leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS) + .freeze(); + } + this.leadCodePointses.add(leadCodePoints); + } + public void setComparator(Comparator comparator) { assert !frozen; this.comparator = comparator; @@ -353,7 +386,7 @@ public class NumberParserImpl { public void parse(String input, int start, boolean greedy, ParsedNumber result) { assert frozen; assert start >= 0 && start < input.length(); - StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags)); + StringSegment segment = new StringSegment(input, parseFlags); segment.adjustOffset(start); if (greedy) { parseGreedyRecursive(segment, result); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java index c4a0005c0e7..4d17cd618af 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java @@ -2,7 +2,6 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; -import com.ibm.icu.lang.UCharacter; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSet.EntryRange; @@ -23,6 +22,7 @@ public class ParsingUtils { public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200; public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400; public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800; + public static final int PARSE_FLAG_OPTIMIZE = 0x1000; public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) { for (EntryRange range : input.ranges()) { @@ -39,16 +39,4 @@ public class ParsingUtils { } } - /** - * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string. - */ - public static String maybeFold(String input, int parseFlags) { - UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF); - if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) { - return UCharacter.foldCase(input, true); - } else { - return input; - } - } - } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java index a6c053af7ea..329ee12ba6f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java @@ -15,16 +15,13 @@ public class ScientificMatcher implements NumberParseMatcher { private final String exponentSeparatorString; private final DecimalMatcher exponentMatcher; - public static ScientificMatcher getInstance( - DecimalFormatSymbols symbols, - Grouper grouper, - int parseFlags) { + public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) { // TODO: Static-initialize most common instances? - return new ScientificMatcher(symbols, grouper, parseFlags); + return new ScientificMatcher(symbols, grouper); } - private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) { - exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags); + private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) { + exponentSeparatorString = symbols.getExponentSeparator(); exponentMatcher = DecimalMatcher.getInstance(symbols, grouper, ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY); @@ -47,19 +44,14 @@ public class ScientificMatcher implements NumberParseMatcher { if (segment.length() == 0) { return true; } - int leadCp = segment.getCodePoint(); - if (leadCp == -1) { - // Partial code point match - return true; - } // Allow a sign, and then try to match digits. boolean minusSign = false; - if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) { + if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) { minusSign = true; - segment.adjustOffset(Character.charCount(leadCp)); - } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) { - segment.adjustOffset(Character.charCount(leadCp)); + segment.adjustOffsetByCodePoint(); + } else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) { + segment.adjustOffsetByCodePoint(); } int digitsOffset = segment.getOffset(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java index 6b92df6e368..bc0cab0c5d0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java @@ -2,6 +2,9 @@ // License & terms of use: http://www.unicode.org/copyright.html#License package com.ibm.icu.impl.number.parse; +import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.text.UnicodeSet; + /** * A mutable class allowing for a String with a variable offset and length. The charAt, length, and * subSequence methods all operate relative to the fixed offset into the String. @@ -12,11 +15,13 @@ public class StringSegment implements CharSequence { private final String str; private int start; private int end; + private boolean foldCase; - public StringSegment(String str) { + public StringSegment(String str, int parseFlags) { this.str = str; this.start = 0; this.end = str.length(); + this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE); } public int getOffset() { @@ -42,6 +47,13 @@ public class StringSegment implements CharSequence { start += delta; } + /** + * Adjusts the offset by the width of the current code point, either 1 or 2 chars. + */ + public void adjustOffsetByCodePoint() { + start += Character.charCount(getCodePoint()); + } + public void setLength(int length) { assert length >= 0; assert start + length <= str.length(); @@ -72,6 +84,10 @@ public class StringSegment implements CharSequence { /** * Returns the first code point in the string segment, or -1 if the string starts with an invalid * code point. + * + *

+ * Important: Most of the time, you should use {@link #matches}, which handles case + * folding logic, instead of this method. */ public int getCodePoint() { assert start < end; @@ -85,15 +101,56 @@ public class StringSegment implements CharSequence { } } + /** + * Returns true if the first code point of this StringSegment equals the given code point. + * + *

+ * This method will perform case folding if case folding is enabled for the parser. + */ + public boolean matches(int otherCp) { + return codePointsEqual(getCodePoint(), otherCp, foldCase); + } + + /** + * Returns true if the first code point of this StringSegment is in the given UnicodeSet. + */ + public boolean matches(UnicodeSet uniset) { + // TODO: Move UnicodeSet case-folding logic here. + // TODO: Handle string matches here instead of separately. + int cp = getCodePoint(); + if (cp == -1) { + return false; + } + return uniset.contains(cp); + } + /** * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2, * since the first 2 characters are the same. + * + *

+ * This method will perform case folding if case folding is enabled for the parser. */ public int getCommonPrefixLength(CharSequence other) { + return getPrefixLengthInternal(other, foldCase); + } + + /** + * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is + * enabled for the parser. + */ + public int getCaseSensitivePrefixLength(CharSequence other) { + return getPrefixLengthInternal(other, false); + } + + private int getPrefixLengthInternal(CharSequence other, boolean foldCase) { int offset = 0; for (; offset < Math.min(length(), other.length());) { - if (charAt(offset) != other.charAt(offset)) { + // TODO: case-fold code points, not chars + char c1 = charAt(offset); + char c2 = other.charAt(offset); + if (!codePointsEqual(c1, c2, foldCase)) { break; } offset++; @@ -101,6 +158,30 @@ public class StringSegment implements CharSequence { return offset; } + // /** + // * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string. + // */ + // public static String maybeFold(String input, int parseFlags) { + // UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF); + // if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) { + // return UCharacter.foldCase(input, true); + // } else { + // return input; + // } + // } + + private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) { + if (cp1 == cp2) { + return true; + } + if (!foldCase) { + return false; + } + cp1 = UCharacter.foldCase(cp1, true); + cp2 = UCharacter.foldCase(cp2, true); + return cp1 == cp2; + } + @Override public String toString() { return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java index e31841e6872..bf15d726b7a 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java @@ -47,9 +47,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher { } } - int cp = segment.getCodePoint(); - if (cp != -1 && uniSet.contains(cp)) { - segment.adjustOffset(Character.charCount(cp)); + if (segment.matches(uniSet)) { + segment.adjustOffsetByCodePoint(); accept(segment, result); return false; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java index bf0593e1230..d458f07de35 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java @@ -51,7 +51,7 @@ public class UnicodeSetStaticCache { DIGITS, NAN_LEAD, SCIENTIFIC_LEAD, - CWCF, + CWCF, // TODO: Check if this is being used and remove it if not. // Combined Separators with Digits (for lead code points) DIGITS_OR_ALL_SEPARATORS, diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java index 7320845580c..e5e81bb5d26 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java @@ -16,6 +16,7 @@ import java.io.ObjectOutputStream; import java.io.Serializable; import java.lang.reflect.Field; import java.text.FieldPosition; +import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -42,6 +43,7 @@ import com.ibm.icu.text.MeasureFormat; import com.ibm.icu.text.MeasureFormat.FormatWidth; import com.ibm.icu.text.NumberFormat; import com.ibm.icu.util.Currency; +import com.ibm.icu.util.CurrencyAmount; import com.ibm.icu.util.Measure; import com.ibm.icu.util.MeasureUnit; import com.ibm.icu.util.NoUnit; @@ -1925,6 +1927,15 @@ public class MeasureUnitTest extends TestFmwk { assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj); } + @Test + public void testCurrencyFormatParseIsoCode() throws ParseException { + MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH); + CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56"); + assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0); + assertEquals("Should parse ISO code GTQ even though the currency is USD", + "GTQ", result.getCurrency().getCurrencyCode()); + } + @Test public void testDoubleZero() { ULocale en = new ULocale("en"); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java index f449d885ae6..4697abd5da4 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -868,7 +868,7 @@ public class NumberFormatTest extends TestFmwk { new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 6,400, "EUR" ), new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ), new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 2, 8, "EUR" ), - new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 0, 0, "USD" ), + new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 4, 4, "USD" ), new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, "EUR" ), new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ), @@ -2018,7 +2018,6 @@ public class NumberFormatTest extends TestFmwk { }; @SuppressWarnings("resource") // InputStream is will be closed by the ResourceReader. - @Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571") @Test public void TestCases() { String caseFileName = "NumberFormatTestCases.txt"; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 4e69a762581..70513ce5f1a 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher; import com.ibm.icu.impl.number.parse.MinusSignMatcher; import com.ibm.icu.impl.number.parse.NumberParserImpl; import com.ibm.icu.impl.number.parse.ParsedNumber; +import com.ibm.icu.impl.number.parse.ParsingUtils; import com.ibm.icu.impl.number.parse.PercentMatcher; import com.ibm.icu.impl.number.parse.PlusSignMatcher; import com.ibm.icu.impl.number.parse.SeriesMatcher; @@ -191,7 +192,7 @@ public class NumberParserTest { int expectedOffset = (Integer) cas[1]; boolean expectedMaybeMore = (Boolean) cas[2]; - StringSegment segment = new StringSegment(input); + StringSegment segment = new StringSegment(input, 0); ParsedNumber result = new ParsedNumber(); boolean actualMaybeMore = series.match(segment, result); int actualOffset = segment.getOffset(); @@ -215,4 +216,39 @@ public class NumberParserTest { result.getNumber().doubleValue(), 0.0); } + + @Test + public void testCaseFolding() { + Object[][] cases = new Object[][] { + // pattern, input string, case sensitive chars, case insensitive chars + { "0", "JP¥3456", 7, 7 }, + { "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode + { "A0", "A5", 2, 2 }, + { "A0", "a5", 0, 2 }, + { "0", "NaN", 3, 3 }, + { "0", "nan", 0, 3 } }; + for (Object[] cas : cases) { + String patternString = (String) cas[0]; + String inputString = (String) cas[1]; + int expectedCaseSensitiveChars = (Integer) cas[2]; + int expectedCaseFoldingChars = (Integer) cas[3]; + + NumberParserImpl caseSensitiveParser = NumberParserImpl + .removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE); + ParsedNumber result = new ParsedNumber(); + caseSensitiveParser.parse(inputString, true, result); + assertEquals("Case-Sensitive: " + inputString + " on " + patternString, + expectedCaseSensitiveChars, + result.charEnd); + + NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH, + patternString, + ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE); + result = new ParsedNumber(); + caseFoldingParser.parse(inputString, true, result); + assertEquals("Folded: " + inputString + " on " + patternString, + expectedCaseFoldingChars, + result.charEnd); + } + } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java index 016fa581c98..cb4106ad934 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java @@ -17,7 +17,7 @@ public class StringSegmentTest { @Test public void testOffset() { - StringSegment segment = new StringSegment(SAMPLE_STRING); + StringSegment segment = new StringSegment(SAMPLE_STRING, 0); assertEquals(0, segment.getOffset()); segment.adjustOffset(3); assertEquals(3, segment.getOffset()); @@ -29,7 +29,7 @@ public class StringSegmentTest { @Test public void testLength() { - StringSegment segment = new StringSegment(SAMPLE_STRING); + StringSegment segment = new StringSegment(SAMPLE_STRING, 0); assertEquals(11, segment.length()); segment.adjustOffset(3); assertEquals(8, segment.length()); @@ -43,7 +43,7 @@ public class StringSegmentTest { @Test public void testCharAt() { - StringSegment segment = new StringSegment(SAMPLE_STRING); + StringSegment segment = new StringSegment(SAMPLE_STRING, 0); assertCharSequenceEquals(SAMPLE_STRING, segment); segment.adjustOffset(3); assertCharSequenceEquals("radio 📻", segment); @@ -53,7 +53,7 @@ public class StringSegmentTest { @Test public void testGetCodePoint() { - StringSegment segment = new StringSegment(SAMPLE_STRING); + StringSegment segment = new StringSegment(SAMPLE_STRING, 0); assertEquals(0x1F4FB, segment.getCodePoint()); segment.setLength(1); assertEquals(-1, segment.getCodePoint()); @@ -66,7 +66,7 @@ public class StringSegmentTest { @Test public void testCommonPrefixLength() { - StringSegment segment = new StringSegment(SAMPLE_STRING); + StringSegment segment = new StringSegment(SAMPLE_STRING, 0); assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING)); assertEquals(4, segment.getCommonPrefixLength("📻 r")); assertEquals(3, segment.getCommonPrefixLength("📻 x"));