public boolean integerOnly = false;
public boolean isScientific = false;
- private UnicodeSet groupingUniSet;
- private UnicodeSet decimalUniSet;
- private UnicodeSet separatorSet;
- private String[] digitStrings;
+ private UnicodeSet groupingUniSet = null;
+ private UnicodeSet decimalUniSet = null;
+ private UnicodeSet separatorSet = null;
+ private UnicodeSet separatorLeadChars = null;
+ private String[] digitStrings = null;
private boolean frozen;
public DecimalMatcher() {
assert !frozen;
frozen = true;
- groupingUniSet = SeparatorSetUtils.getGroupingUnicodeSet(symbols, isStrict);
- decimalUniSet = SeparatorSetUtils.getDecimalUnicodeSet(symbols, isStrict);
- separatorSet = SeparatorSetUtils.unionUnicodeSets(groupingUniSet, decimalUniSet);
- digitStrings = symbols.getDigitStringsLocal();
+ String groupingSeparator = symbols.getGroupingSeparatorString();
+ String decimalSeparator = symbols.getDecimalSeparatorString();
+ UnicodeSetStaticCache.Key groupingKey, decimalKey;
+
+ // Attempt to find values in the static cache
+ if (isStrict) {
+ groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator,
+ UnicodeSetStaticCache.Key.OTHER_GROUPING_SEPARATORS,
+ UnicodeSetStaticCache.Key.STRICT_COMMA_OR_OTHER,
+ UnicodeSetStaticCache.Key.STRICT_PERIOD_OR_OTHER);
+ decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator,
+ UnicodeSetStaticCache.Key.STRICT_COMMA,
+ UnicodeSetStaticCache.Key.STRICT_PERIOD);
+ } else {
+ groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator,
+ UnicodeSetStaticCache.Key.OTHER_GROUPING_SEPARATORS,
+ UnicodeSetStaticCache.Key.COMMA_OR_OTHER,
+ UnicodeSetStaticCache.Key.PERIOD_OR_OTHER);
+ decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator,
+ UnicodeSetStaticCache.Key.COMMA,
+ UnicodeSetStaticCache.Key.PERIOD);
+ }
+
+ // Get the sets from the static cache if they were found
+ if (groupingKey != null && decimalKey != null) {
+ groupingUniSet = UnicodeSetStaticCache.get(groupingKey);
+ decimalUniSet = UnicodeSetStaticCache.get(decimalKey);
+ UnicodeSetStaticCache.Key separatorKey = UnicodeSetStaticCache.unionOf(groupingKey, decimalKey);
+ if (separatorKey != null) {
+ separatorSet = UnicodeSetStaticCache.get(separatorKey);
+ separatorLeadChars = UnicodeSetStaticCache.getLeadChars(separatorKey);
+ }
+ } else if (groupingKey != null) {
+ groupingUniSet = UnicodeSetStaticCache.get(groupingKey);
+ } else if (decimalKey != null) {
+ decimalUniSet = UnicodeSetStaticCache.get(decimalKey);
+ }
+
+ // Resolve fallbacks if we don't have sets from the static cache
+ if (groupingUniSet == null) {
+ groupingUniSet = new UnicodeSet().add(groupingSeparator).freeze();
+ }
+ if (decimalUniSet == null) {
+ decimalUniSet = new UnicodeSet().add(decimalSeparator).freeze();
+ }
+ if (separatorSet == null) {
+ separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze();
+ }
+
+ int cpZero = symbols.getCodePointZero();
+ if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZero) != 0) {
+ digitStrings = symbols.getDigitStrings();
+ }
}
@Override
}
// Try by digit string.
- if (digit == -1) {
+ if (digit == -1 && digitStrings != null) {
for (int i = 0; i < digitStrings.length; i++) {
String str = digitStrings[i];
int overlap = segment.getCommonPrefixLength(str);
return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate();
}
- private static final UnicodeSet UNISET_DIGITS = new UnicodeSet("[:digit:]");
-
@Override
public UnicodeSet getLeadChars(boolean ignoreCase) {
UnicodeSet leadChars = new UnicodeSet();
- ParsingUtils.putLeadSurrogates(UNISET_DIGITS, leadChars);
- for (int i = 0; i < digitStrings.length; i++) {
- ParsingUtils.putLeadingChar(digitStrings[i], leadChars, ignoreCase);
+ leadChars.addAll(UnicodeSetStaticCache.getLeadChars(UnicodeSetStaticCache.Key.DIGITS));
+ if (digitStrings != null) {
+ for (int i = 0; i < digitStrings.length; i++) {
+ ParsingUtils.putLeadingChar(digitStrings[i], leadChars, ignoreCase);
+ }
+ }
+ if (separatorLeadChars != null) {
+ leadChars.addAll(separatorLeadChars);
+ } else {
+ ParsingUtils.putLeadSurrogates(separatorSet, leadChars);
}
- ParsingUtils.putLeadSurrogates(separatorSet, leadChars);
return leadChars.freeze();
}
*/
public class IgnorablesMatcher extends RangeMatcher {
- // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
- static final UnicodeSet UNISET_BIDI = new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze();
+ public static final IgnorablesMatcher DEFAULT = new IgnorablesMatcher(
+ UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES));
- // This set was decided after discussion with icu-design@. See ticket #13309.
- // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
-
- /** The default set of ignorables. */
- static final UnicodeSet DEFAULT_UNISET = UNISET_BIDI.cloneAsThawed().addAll(UNISET_WHITESPACE).freeze();
-
- /** The default set of ignorables for strict mode. */
- static final UnicodeSet STRICT_UNISET = UNISET_BIDI;
-
- private static final IgnorablesMatcher DEFAULT_INSTANCE = new IgnorablesMatcher(DEFAULT_UNISET);
- private static final IgnorablesMatcher STRICT_INSTANCE = new IgnorablesMatcher(STRICT_UNISET);
+ public static final IgnorablesMatcher STRICT = new IgnorablesMatcher(
+ UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.STRICT_IGNORABLES));
public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
assert ignorables.isFrozen();
- if (ignorables == DEFAULT_UNISET || ignorables.equals(DEFAULT_UNISET)) {
- return DEFAULT_INSTANCE;
- } else if (ignorables == STRICT_UNISET || ignorables.equals(STRICT_UNISET)) {
- return STRICT_INSTANCE;
- } else {
- return new IgnorablesMatcher(ignorables);
- }
+ return new IgnorablesMatcher(ignorables);
}
private IgnorablesMatcher(UnicodeSet ignorables) {
super(ignorables);
}
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ if (this == DEFAULT) {
+ return UnicodeSetStaticCache.getLeadChars(UnicodeSetStaticCache.Key.DEFAULT_IGNORABLES);
+ } else if (this == STRICT) {
+ return UnicodeSetStaticCache.getLeadChars(UnicodeSetStaticCache.Key.STRICT_IGNORABLES);
+ } else {
+ return super.getLeadChars(ignoreCase);
+ }
+ }
+
@Override
protected boolean isDisabled(ParsedNumber result) {
return false;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
/**
*/
public class MinusSignMatcher extends SymbolMatcher {
- public MinusSignMatcher() {
- // FIXME
- super("-", new UnicodeSet("[-_]"));
+ private static final MinusSignMatcher DEFAULT = new MinusSignMatcher();
+
+ public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols) {
+ String symbolString = symbols.getMinusSignString();
+ if (DEFAULT.uniSet.contains(symbolString)) {
+ return DEFAULT;
+ } else {
+ return new MinusSignMatcher(symbolString);
+ }
+ }
+
+ private MinusSignMatcher(String symbolString) {
+ super(symbolString, UnicodeSet.EMPTY);
+ }
+
+ private MinusSignMatcher() {
+ super(UnicodeSetStaticCache.Key.MINUS_SIGN);
}
@Override
*/
public class NanMatcher extends SymbolMatcher {
- public NanMatcher(DecimalFormatSymbols symbols) {
- super(symbols.getNaN(), UnicodeSet.EMPTY);
+ private static final NanMatcher DEFAULT = new NanMatcher();
+
+ public static NanMatcher getInstance(DecimalFormatSymbols symbols) {
+ String symbolString = symbols.getNaN();
+ if (DEFAULT.string.equals(symbolString)) {
+ return DEFAULT;
+ } else {
+ return new NanMatcher(symbolString);
+ }
+ }
+
+ private NanMatcher(String symbolString) {
+ super(symbolString, UnicodeSet.EMPTY);
+ }
+
+ private NanMatcher() {
+ super("NaN", UnicodeSet.EMPTY);
}
@Override
AffixPatternProvider patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, new UnicodeSet(), true);
- parser.addMatcher(IgnorablesMatcher.getInstance(IgnorablesMatcher.DEFAULT_UNISET));
+ parser.addMatcher(IgnorablesMatcher.DEFAULT);
DecimalMatcher decimalMatcher = new DecimalMatcher();
decimalMatcher.requireGroupingMatch = strictGrouping;
decimalMatcher.grouping1 = 3;
decimalMatcher.grouping2 = 2;
decimalMatcher.freeze(symbols, false);
parser.addMatcher(decimalMatcher);
- parser.addMatcher(new MinusSignMatcher());
+ parser.addMatcher(MinusSignMatcher.getInstance(symbols));
parser.addMatcher(new ScientificMatcher(symbols));
parser.addMatcher(new CurrencyMatcher(locale));
parser.addMatcher(new RequireNumberMatcher());
ULocale locale = symbols.getULocale();
Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
- UnicodeSet ignorables = isStrict ? IgnorablesMatcher.STRICT_UNISET : IgnorablesMatcher.DEFAULT_UNISET;
+ IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
boolean decimalSeparatorRequired = properties.getDecimalPatternMatchRequired()
? (properties.getDecimalSeparatorAlwaysShown() || properties.getMaximumFractionDigits() != 0)
// Set up a pattern modifier with mostly defaults to generate AffixMatchers.
AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties);
- AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, !isStrict);
+ AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables.getSet(), !isStrict);
////////////////////////
/// CURRENCY MATCHER ///
/// OTHER STANDARD MATCHERS ///
///////////////////////////////
- if (!isStrict) {
- parser.addMatcher(IgnorablesMatcher.getInstance(ignorables));
+ parser.addMatcher(ignorables);
+ if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN) || properties.getSignAlwaysShown()) {
+ parser.addMatcher(PlusSignMatcher.getInstance(symbols));
}
- if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)) {
- parser.addMatcher(new PlusSignMatcher());
- }
- parser.addMatcher(new MinusSignMatcher());
- parser.addMatcher(new NanMatcher(symbols));
- parser.addMatcher(new PercentMatcher());
- parser.addMatcher(new PermilleMatcher());
+ parser.addMatcher(MinusSignMatcher.getInstance(symbols));
+ parser.addMatcher(NanMatcher.getInstance(symbols));
+ parser.addMatcher(PercentMatcher.getInstance(symbols));
+ parser.addMatcher(PermilleMatcher.getInstance(symbols));
DecimalMatcher decimalMatcher = new DecimalMatcher();
decimalMatcher.requireGroupingMatch = isStrict;
decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
/**
*/
public class PercentMatcher extends SymbolMatcher {
- public PercentMatcher() {
- // FIXME
- super("%", new UnicodeSet("[%]"));
+ private static final PercentMatcher DEFAULT = new PercentMatcher();
+
+ public static PercentMatcher getInstance(DecimalFormatSymbols symbols) {
+ String symbolString = symbols.getPercentString();
+ if (DEFAULT.uniSet.contains(symbolString)) {
+ return DEFAULT;
+ } else {
+ return new PercentMatcher(symbolString);
+ }
+ }
+
+ private PercentMatcher(String symbolString) {
+ super(symbolString, UnicodeSet.EMPTY);
+ }
+
+ private PercentMatcher() {
+ super(UnicodeSetStaticCache.Key.PERCENT_SIGN);
}
@Override
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
/**
*/
public class PermilleMatcher extends SymbolMatcher {
- public PermilleMatcher() {
- // FIXME
- super("‰", new UnicodeSet("[‰]"));
+ private static final PermilleMatcher DEFAULT = new PermilleMatcher();
+
+ public static PermilleMatcher getInstance(DecimalFormatSymbols symbols) {
+ String symbolString = symbols.getPerMillString();
+ if (DEFAULT.uniSet.contains(symbolString)) {
+ return DEFAULT;
+ } else {
+ return new PermilleMatcher(symbolString);
+ }
+ }
+
+ private PermilleMatcher(String symbolString) {
+ super(symbolString, UnicodeSet.EMPTY);
+ }
+
+ private PermilleMatcher() {
+ super(UnicodeSetStaticCache.Key.PERMILLE_SIGN);
}
@Override
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
/**
*/
public class PlusSignMatcher extends SymbolMatcher {
- public PlusSignMatcher() {
- // FIXME
- super("+", new UnicodeSet("[+]"));
+ private static final PlusSignMatcher DEFAULT = new PlusSignMatcher();
+
+ public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols) {
+ String symbolString = symbols.getPlusSignString();
+ if (DEFAULT.uniSet.contains(symbolString)) {
+ return DEFAULT;
+ } else {
+ return new PlusSignMatcher(symbolString);
+ }
+ }
+
+ private PlusSignMatcher(String symbolString) {
+ super(symbolString, UnicodeSet.EMPTY);
+ }
+
+ private PlusSignMatcher() {
+ super(UnicodeSetStaticCache.Key.PLUS_SIGN);
}
@Override
this.uniSet = uniSet;
}
+ public UnicodeSet getSet() {
+ return uniSet;
+ }
+
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
// Smoke test first; this matcher might be disabled.
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.text.DecimalFormatSymbols;
-import com.ibm.icu.text.UnicodeSet;
-
-/**
- * @author sffc
- *
- */
-public class SeparatorSetUtils {
-
- // TODO: Re-generate these sets from the database. They probably haven't been updated in a while.
-
- static final UnicodeSet COMMA_LIKE = new UnicodeSet(
- "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze();
-
- static final UnicodeSet STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze();
-
- static final UnicodeSet PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze();
-
- static final UnicodeSet STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze();
-
- static final UnicodeSet OTHER_GROUPING_SEPARATORS = new UnicodeSet(
- "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze();
-
- static final UnicodeSet COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE).freeze();
-
- static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
- .addAll(STRICT_PERIOD_LIKE).freeze();
-
- static final UnicodeSet COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(OTHER_GROUPING_SEPARATORS)
- .freeze();
-
- static final UnicodeSet STRICT_COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
- .addAll(OTHER_GROUPING_SEPARATORS).freeze();
-
- static final UnicodeSet PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(PERIOD_LIKE)
- .addAll(OTHER_GROUPING_SEPARATORS).freeze();
-
- static final UnicodeSet STRICT_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_PERIOD_LIKE)
- .addAll(OTHER_GROUPING_SEPARATORS).freeze();
-
- static final UnicodeSet COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE)
- .addAll(OTHER_GROUPING_SEPARATORS).freeze();
-
- static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
- .addAll(STRICT_PERIOD_LIKE).addAll(OTHER_GROUPING_SEPARATORS).freeze();
-
- public static UnicodeSet getGroupingUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
- if (isStrict) {
- return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
- STRICT_COMMA_LIKE_OR_OTHER,
- STRICT_PERIOD_LIKE_OR_OTHER,
- OTHER_GROUPING_SEPARATORS);
- } else {
- return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
- COMMA_LIKE_OR_OTHER,
- PERIOD_LIKE_OR_OTHER,
- OTHER_GROUPING_SEPARATORS);
- }
- }
-
- public static UnicodeSet getDecimalUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
- if (isStrict) {
- return chooseUnicodeSet(symbols.getDecimalSeparatorString(), STRICT_COMMA_LIKE, STRICT_PERIOD_LIKE);
- } else {
- return chooseUnicodeSet(symbols.getDecimalSeparatorString(), COMMA_LIKE, PERIOD_LIKE);
- }
- }
-
- private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1) {
- return set1.contains(str) ? set1 : new UnicodeSet().add(str).freeze();
- }
-
- private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2) {
- return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2);
- }
-
- private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2, UnicodeSet set3) {
- return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2, set3);
- }
-
- public static UnicodeSet unionUnicodeSets(UnicodeSet set1, UnicodeSet set2) {
- // Note: == operators should be okay here since non-static UnicodeSets happen only in fallback cases.
- if (set1 == UnicodeSet.EMPTY && set2 == UnicodeSet.EMPTY) {
- return UnicodeSet.EMPTY;
- } else if (set1 == COMMA_LIKE_OR_OTHER && set2 == PERIOD_LIKE_OR_OTHER) {
- return COMMA_OR_PERIOD_LIKE_OR_OTHER;
- } else if (set1 == PERIOD_LIKE_OR_OTHER && set2 == COMMA_LIKE_OR_OTHER) {
- return COMMA_OR_PERIOD_LIKE_OR_OTHER;
- } else if (set1 == STRICT_COMMA_LIKE_OR_OTHER && set2 == STRICT_PERIOD_LIKE_OR_OTHER) {
- return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
- } else if (set1 == STRICT_PERIOD_LIKE_OR_OTHER && set2 == STRICT_COMMA_LIKE_OR_OTHER) {
- return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
- } else if (set1 == COMMA_LIKE && set2 == PERIOD_LIKE) {
- return COMMA_OR_PERIOD_LIKE;
- } else if (set1 == PERIOD_LIKE && set2 == COMMA_LIKE) {
- return COMMA_OR_PERIOD_LIKE;
- } else if (set1 == STRICT_COMMA_LIKE && set2 == STRICT_PERIOD_LIKE) {
- return STRICT_COMMA_OR_PERIOD_LIKE;
- } else if (set1 == STRICT_PERIOD_LIKE && set2 == STRICT_COMMA_LIKE) {
- return STRICT_COMMA_OR_PERIOD_LIKE;
- } else {
- return set1.cloneAsThawed().addAll(set2).freeze();
- }
- }
-}
public abstract class SymbolMatcher implements NumberParseMatcher {
protected final String string;
protected final UnicodeSet uniSet;
+ protected final UnicodeSet leadChars;
+
+ // TODO: Implement this class using only UnicodeSet and not String?
+ // How to deal with case folding?
protected SymbolMatcher(String symbolString, UnicodeSet symbolUniSet) {
string = symbolString;
uniSet = symbolUniSet;
+ leadChars = null;
+ }
+
+ protected SymbolMatcher(UnicodeSetStaticCache.Key key) {
+ string = "";
+ uniSet = UnicodeSetStaticCache.get(key);
+ leadChars = UnicodeSetStaticCache.getLeadChars(key);
}
@Override
accept(segment, result);
return false;
}
+
+ if (string.isEmpty()) {
+ return segment.isLeadingSurrogate();
+ }
int overlap = segment.getCommonPrefixLength(string);
if (overlap == string.length()) {
segment.adjustOffset(string.length());
@Override
public UnicodeSet getLeadChars(boolean ignoreCase) {
+ if (leadChars != null) {
+ return leadChars;
+ }
+
UnicodeSet leadChars = new UnicodeSet();
ParsingUtils.putLeadSurrogates(uniSet, leadChars);
ParsingUtils.putLeadingChar(string, leadChars, ignoreCase);
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.EnumMap;
+import java.util.Map;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class UnicodeSetStaticCache {
+ public static enum Key {
+ // Ignorables
+ BIDI,
+ WHITESPACE,
+ DEFAULT_IGNORABLES,
+ STRICT_IGNORABLES,
+
+ // Separators
+ COMMA,
+ PERIOD,
+ OTHER_GROUPING_SEPARATORS,
+ COMMA_OR_OTHER,
+ PERIOD_OR_OTHER,
+ COMMA_OR_PERIOD_OR_OTHER,
+ STRICT_COMMA,
+ STRICT_PERIOD,
+ STRICT_COMMA_OR_OTHER,
+ STRICT_PERIOD_OR_OTHER,
+ STRICT_COMMA_OR_PERIOD_OR_OTHER,
+
+ // Symbols
+ // TODO: NaN?
+ MINUS_SIGN,
+ PLUS_SIGN,
+ PERCENT_SIGN,
+ PERMILLE_SIGN,
+ INFINITY,
+
+ // Other
+ DIGITS,
+ };
+
+ private static final Map<Key, UnicodeSet> unicodeSets = new EnumMap<Key, UnicodeSet>(Key.class);
+ private static final Map<Key, UnicodeSet> leadCharsSets = new EnumMap<Key, UnicodeSet>(Key.class);
+
+ public static UnicodeSet get(Key key) {
+ return unicodeSets.get(key);
+ }
+
+ public static UnicodeSet getLeadChars(Key key) {
+ return leadCharsSets.get(key);
+ }
+
+ public static Key chooseFrom(String str, Key key1) {
+ return get(key1).contains(str) ? key1 : null;
+ }
+
+ public static Key chooseFrom(String str, Key key1, Key key2) {
+ return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
+ }
+
+ public static Key chooseFrom(String str, Key key1, Key key2, Key key3) {
+ return get(key1).contains(str) ? key1 : chooseFrom(str, key2, key3);
+ }
+
+ public static Key unionOf(Key key1, Key key2) {
+ // Make sure key1 < key2
+ if (key2.ordinal() < key1.ordinal()) {
+ Key temp = key1;
+ key1 = key2;
+ key2 = temp;
+ }
+
+ if (key1 == Key.COMMA && key2 == Key.PERIOD_OR_OTHER) {
+ // 1.234,567
+ return Key.COMMA_OR_PERIOD_OR_OTHER;
+
+ } else if (key1 == Key.COMMA && key2 == Key.OTHER_GROUPING_SEPARATORS) {
+ // 1'234,567
+ return Key.COMMA_OR_OTHER;
+
+ } else if (key1 == Key.PERIOD && key2 == Key.COMMA_OR_OTHER) {
+ // 1,234.567
+ return Key.COMMA_OR_PERIOD_OR_OTHER;
+
+ } else if (key1 == Key.PERIOD && key2 == Key.OTHER_GROUPING_SEPARATORS) {
+ // 1'234.567
+ return Key.PERIOD_OR_OTHER;
+
+ } else if (key1 == Key.STRICT_COMMA && key2 == Key.STRICT_PERIOD_OR_OTHER) {
+ // Strict 1.234,567
+ return Key.STRICT_COMMA_OR_PERIOD_OR_OTHER;
+
+ } else if (key1 == Key.STRICT_COMMA && key2 == Key.OTHER_GROUPING_SEPARATORS) {
+ // Strict 1'234,567
+ return Key.STRICT_COMMA_OR_OTHER;
+
+ } else if (key1 == Key.STRICT_PERIOD && key2 == Key.STRICT_COMMA_OR_OTHER) {
+ // Strict 1,234.567
+ return Key.STRICT_COMMA_OR_PERIOD_OR_OTHER;
+
+ } else if (key1 == Key.STRICT_PERIOD && key2 == Key.OTHER_GROUPING_SEPARATORS) {
+ // Strict 1'234.567
+ return Key.STRICT_PERIOD_OR_OTHER;
+
+ }
+
+ return null;
+ }
+
+ private static UnicodeSet computeUnion(Key k1, Key k2) {
+ return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).freeze();
+ }
+
+ private static UnicodeSet computeUnion(Key k1, Key k2, Key k3) {
+ return new UnicodeSet().addAll(get(k1)).addAll(get(k2)).addAll(get(k3)).freeze();
+ }
+
+ static {
+ // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
+ unicodeSets.put(Key.BIDI, new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze());
+
+ // This set was decided after discussion with icu-design@. See ticket #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ unicodeSets.put(Key.WHITESPACE, new UnicodeSet("[[:Zs:][\\u0009]]").freeze());
+
+ unicodeSets.put(Key.DEFAULT_IGNORABLES, computeUnion(Key.BIDI, Key.WHITESPACE));
+ unicodeSets.put(Key.STRICT_IGNORABLES, get(Key.BIDI));
+
+ // TODO: Re-generate these sets from the UCD. They probably haven't been updated in a while.
+ unicodeSets.put(Key.COMMA,
+ new UnicodeSet("[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze());
+ unicodeSets.put(Key.STRICT_COMMA, new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze());
+ unicodeSets.put(Key.PERIOD, new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze());
+ unicodeSets.put(Key.STRICT_PERIOD, new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze());
+ unicodeSets.put(Key.OTHER_GROUPING_SEPARATORS,
+ new UnicodeSet("[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]")
+ .freeze());
+
+ unicodeSets.put(Key.COMMA_OR_OTHER, computeUnion(Key.COMMA, Key.OTHER_GROUPING_SEPARATORS));
+ unicodeSets.put(Key.PERIOD_OR_OTHER, computeUnion(Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS));
+ unicodeSets.put(Key.COMMA_OR_PERIOD_OR_OTHER,
+ computeUnion(Key.COMMA, Key.PERIOD, Key.OTHER_GROUPING_SEPARATORS));
+ unicodeSets.put(Key.STRICT_COMMA_OR_OTHER, computeUnion(Key.STRICT_COMMA, Key.OTHER_GROUPING_SEPARATORS));
+ unicodeSets.put(Key.STRICT_PERIOD_OR_OTHER, computeUnion(Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS));
+ unicodeSets.put(Key.STRICT_COMMA_OR_PERIOD_OR_OTHER,
+ computeUnion(Key.STRICT_COMMA, Key.STRICT_PERIOD, Key.OTHER_GROUPING_SEPARATORS));
+
+ unicodeSets.put(Key.MINUS_SIGN,
+ new UnicodeSet(0x002D,
+ 0x002D,
+ 0x207B,
+ 0x207B,
+ 0x208B,
+ 0x208B,
+ 0x2212,
+ 0x2212,
+ 0x2796,
+ 0x2796,
+ 0xFE63,
+ 0xFE63,
+ 0xFF0D,
+ 0xFF0D).freeze());
+ unicodeSets.put(Key.PLUS_SIGN,
+ new UnicodeSet(0x002B,
+ 0x002B,
+ 0x207A,
+ 0x207A,
+ 0x208A,
+ 0x208A,
+ 0x2795,
+ 0x2795,
+ 0xFB29,
+ 0xFB29,
+ 0xFE62,
+ 0xFE62,
+ 0xFF0B,
+ 0xFF0B).freeze());
+
+ // TODO: Fill in the next three sets.
+ unicodeSets.put(Key.PERCENT_SIGN, new UnicodeSet("[%٪]").freeze());
+ unicodeSets.put(Key.PERMILLE_SIGN, new UnicodeSet("[‰؉]").freeze());
+ unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
+
+ unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
+
+ for (Key key : Key.values()) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadSurrogates(get(key), leadChars);
+ leadCharsSets.put(key, leadChars.freeze());
+ }
+ }
+}