public CharSequence getSymbol(int type);
}
+ public static interface TokenConsumer {
+ public void consumeToken(int typeOrCp);
+ }
+
/**
* Estimates the number of code points present in an unescaped version of the affix pattern string
* (one that would be returned by {@link #unescape}), assuming that all interpolated symbols consume
return sb;
}
+ /**
+ * Returns whether the given affix pattern contains only symbols and ignorables as defined by the
+ * given ignorables set.
+ */
+ public static boolean containsOnlySymbolsAndIgnorables(
+ CharSequence affixPattern,
+ UnicodeSet ignorables) {
+ if (affixPattern == null) {
+ return true;
+ }
+ long tag = 0L;
+ while (hasNext(tag, affixPattern)) {
+ tag = nextToken(tag, affixPattern);
+ int typeOrCp = getTypeOrCp(tag);
+ if (typeOrCp >= 0 && !ignorables.contains(typeOrCp)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static void iterateWithConsumer(CharSequence affixPattern, TokenConsumer consumer) {
+ assert affixPattern != null;
+ long tag = 0L;
+ while (hasNext(tag, affixPattern)) {
+ tag = nextToken(tag, affixPattern);
+ int typeOrCp = getTypeOrCp(tag);
+ consumer.consumeToken(typeOrCp);
+ }
+ }
+
/**
* Returns the next token from the affix pattern.
*
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.Objects;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
*
*/
public class AffixMatcher implements NumberParseMatcher {
- private final String prefix;
- private final String suffix;
+ private final AffixPatternMatcher prefix;
+ private final AffixPatternMatcher suffix;
private final int flags;
/**
- * Comparator for two AffixMatcher instances which prioritizes longer prefixes followed by longer suffixes, ensuring
- * that the longest prefix/suffix pair is always chosen.
+ * Comparator for two AffixMatcher instances which prioritizes longer prefixes followed by longer
+ * suffixes, ensuring that the longest prefix/suffix pair is always chosen.
*/
public static final Comparator<AffixMatcher> COMPARATOR = new Comparator<AffixMatcher>() {
@Override
public int compare(AffixMatcher o1, AffixMatcher o2) {
- if (o1.prefix.length() != o2.prefix.length()) {
- return o1.prefix.length() > o2.prefix.length() ? -1 : 1;
- } else if (o1.suffix.length() != o2.suffix.length()) {
- return o1.suffix.length() > o2.suffix.length() ? -1 : 1;
+ if (length(o1.prefix) != length(o2.prefix)) {
+ return length(o1.prefix) > length(o2.prefix) ? -1 : 1;
+ } else if (length(o1.suffix) != length(o2.suffix)) {
+ return length(o1.suffix) > length(o2.suffix) ? -1 : 1;
} else if (!o1.equals(o2)) {
// If the prefix and suffix are the same length, arbitrarily break ties.
// We can't return zero unless the elements are equal.
}
};
- public static void generateFromAffixPatternProvider(
+ public static void newGenerate(
AffixPatternProvider patternInfo,
NumberParserImpl output,
+ MatcherFactory factory,
IgnorablesMatcher ignorables,
int parseFlags) {
- // Lazy-initialize the StringBuilder.
- StringBuilder sb = null;
- // Use initial capacity of 6, the highest possible number of AffixMatchers.
- // TODO: Lazy-initialize?
- ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6);
+ String posPrefixString = patternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX);
+ String posSuffixString = patternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX);
+ String negPrefixString = null;
+ String negSuffixString = null;
+ if (patternInfo.hasNegativeSubpattern()) {
+ negPrefixString = patternInfo.getString(AffixPatternProvider.FLAG_NEG_PREFIX);
+ negSuffixString = patternInfo.getString(AffixPatternProvider.FLAG_NEG_SUFFIX);
+ }
- sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_PREFIX, ignorables.getSet(), sb);
- String posPrefix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlags);
- sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_SUFFIX, ignorables.getSet(), sb);
- String posSuffix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlags);
+ if (0 == (parseFlags & ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES)
+ && AffixUtils.containsOnlySymbolsAndIgnorables(posPrefixString, ignorables.getSet())
+ && AffixUtils.containsOnlySymbolsAndIgnorables(posSuffixString, ignorables.getSet())
+ && AffixUtils.containsOnlySymbolsAndIgnorables(negPrefixString, ignorables.getSet())
+ && AffixUtils.containsOnlySymbolsAndIgnorables(negSuffixString, ignorables.getSet())) {
+ // The affixes contain only symbols and ignorables.
+ // No need to generate affix matchers.
+ return;
+ }
+ // The affixes have interesting characters, or we are in strict mode.
+ // Use initial capacity of 6, the highest possible number of AffixMatchers.
+ ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6);
boolean includeUnpaired = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
- if (!posPrefix.isEmpty() || !posSuffix.isEmpty()) {
- matchers.add(getInstance(posPrefix, posSuffix, 0));
- if (includeUnpaired && !posPrefix.isEmpty() && !posSuffix.isEmpty()) {
- matchers.add(getInstance(posPrefix, "", 0));
- matchers.add(getInstance("", posSuffix, 0));
- }
+ AffixPatternMatcher posPrefix = AffixPatternMatcher
+ .fromAffixPattern(posPrefixString, factory, parseFlags);
+ AffixPatternMatcher posSuffix = AffixPatternMatcher
+ .fromAffixPattern(posSuffixString, factory, parseFlags);
+
+ // Note: it is indeed possible for posPrefix and posSuffix to both be null.
+ // We still need to add that matcher for strict mode to work.
+ matchers.add(getInstance(posPrefix, posSuffix, 0));
+ if (includeUnpaired && posPrefix != null && posSuffix != null) {
+ matchers.add(getInstance(posPrefix, null, 0));
+ matchers.add(getInstance(null, posSuffix, 0));
}
if (patternInfo.hasNegativeSubpattern()) {
- sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_PREFIX, ignorables.getSet(), sb);
- String negPrefix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlags);
- sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_SUFFIX, ignorables.getSet(), sb);
- String negSuffix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlags);
+ AffixPatternMatcher negPrefix = AffixPatternMatcher
+ .fromAffixPattern(negPrefixString, factory, parseFlags);
+ AffixPatternMatcher negSuffix = AffixPatternMatcher
+ .fromAffixPattern(negSuffixString, factory, parseFlags);
- if (negPrefix.equals(posPrefix) && negSuffix.equals(posSuffix)) {
+ if (Objects.equals(negPrefix, posPrefix) && Objects.equals(negSuffix, posSuffix)) {
// No-op: favor the positive AffixMatcher
- } else if (!negPrefix.isEmpty() || !negSuffix.isEmpty()) {
+ } else {
matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG_NEGATIVE));
- if (includeUnpaired && !negPrefix.isEmpty() && !negSuffix.isEmpty()) {
+ if (includeUnpaired && negPrefix != null && negSuffix != null) {
if (!negPrefix.equals(posPrefix)) {
- matchers.add(getInstance(negPrefix, "", ParsedNumber.FLAG_NEGATIVE));
+ matchers.add(getInstance(negPrefix, null, ParsedNumber.FLAG_NEGATIVE));
}
if (!negSuffix.equals(posSuffix)) {
- matchers.add(getInstance("", negSuffix, ParsedNumber.FLAG_NEGATIVE));
+ matchers.add(getInstance(null, negSuffix, ParsedNumber.FLAG_NEGATIVE));
}
}
}
output.addMatchers(matchers);
}
- private static StringBuilder getCleanAffix(
- AffixPatternProvider patternInfo,
- int flag,
- UnicodeSet ignorables,
- StringBuilder sb) {
- if (sb != null) {
- sb.setLength(0);
- }
- if (patternInfo.length(flag) > 0) {
- sb = AffixUtils.trimSymbolsAndIgnorables(patternInfo.getString(flag), ignorables, sb);
- }
- return sb;
- }
-
- private static String toStringOrEmpty(StringBuilder sb) {
- return (sb == null || sb.length() == 0) ? "" : sb.toString();
- }
-
- private static final AffixMatcher getInstance(String prefix, String suffix, int flags) {
+ private static final AffixMatcher getInstance(
+ AffixPatternMatcher prefix,
+ AffixPatternMatcher suffix,
+ int flags) {
// TODO: Special handling for common cases like both strings empty.
return new AffixMatcher(prefix, suffix, flags);
}
- private AffixMatcher(String prefix, String suffix, int flags) {
- assert prefix != null;
- assert suffix != null;
+ private AffixMatcher(AffixPatternMatcher prefix, AffixPatternMatcher suffix, int flags) {
this.prefix = prefix;
this.suffix = suffix;
this.flags = flags;
public boolean match(StringSegment segment, ParsedNumber result) {
if (!result.seenNumber()) {
// Prefix
- if (result.prefix != null || prefix.length() == 0) {
+ // Do not match if:
+ // 1. We have already seen a prefix (result.prefix != null)
+ // 2. The prefix in this AffixMatcher is empty (prefix == null)
+ if (result.prefix != null || prefix == null) {
return false;
}
- int overlap = segment.getCommonPrefixLength(prefix);
- if (overlap == prefix.length()) {
- result.prefix = prefix;
- segment.adjustOffset(overlap);
- result.setCharsConsumed(segment);
- return false;
- } else if (overlap == segment.length()) {
- return true;
+
+ // Attempt to match the prefix.
+ int initialOffset = segment.getOffset();
+ boolean maybeMore = prefix.match(segment, result);
+ if (initialOffset != segment.getOffset()) {
+ result.prefix = prefix.getPattern();
}
+ return maybeMore;
} else {
// Suffix
- if (result.suffix != null || suffix.length() == 0 || !prefix.equals(orEmpty(result.prefix))) {
+ // Do not match if:
+ // 1. We have already seen a suffix (result.suffix != null)
+ // 2. The suffix in this AffixMatcher is empty (suffix == null)
+ // 3. The matched prefix does not equal this AffixMatcher's prefix
+ if (result.suffix != null || suffix == null || !matched(prefix, result.prefix)) {
return false;
}
- int overlap = segment.getCommonPrefixLength(suffix);
- if (overlap == suffix.length()) {
- result.suffix = suffix;
- segment.adjustOffset(overlap);
- result.setCharsConsumed(segment);
- return false;
- } else if (overlap == segment.length()) {
- return true;
+
+ // Attempt to match the suffix.
+ int initialOffset = segment.getOffset();
+ boolean maybeMore = suffix.match(segment, result);
+ if (initialOffset != segment.getOffset()) {
+ result.suffix = suffix.getPattern();
}
+ return maybeMore;
}
-
- return false;
}
@Override
public UnicodeSet getLeadCodePoints() {
UnicodeSet leadCodePoints = new UnicodeSet();
- ParsingUtils.putLeadCodePoint(prefix, leadCodePoints);
- ParsingUtils.putLeadCodePoint(suffix, leadCodePoints);
+ if (prefix != null) {
+ leadCodePoints.addAll(prefix.getLeadCodePoints());
+ }
+ if (suffix != null) {
+ leadCodePoints.addAll(suffix.getLeadCodePoints());
+ }
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ // This is a stub implementation.
+ throw new AssertionError();
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// Check to see if our affix is the one that was matched. If so, set the flags in the result.
- if (prefix.equals(orEmpty(result.prefix)) && suffix.equals(orEmpty(result.suffix))) {
+ if (matched(prefix, result.prefix) && matched(suffix, result.suffix)) {
// Fill in the result prefix and suffix with non-null values (empty string).
// Used by strict mode to determine whether an entire affix pair was matched.
- result.prefix = prefix;
- result.suffix = suffix;
+ if (result.prefix == null) {
+ result.prefix = "";
+ }
+ if (result.suffix == null) {
+ result.suffix = "";
+ }
result.flags |= flags;
}
}
/**
- * Returns the input string, or "" if input is null.
+ * Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
+ * Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
+ * the given pattern string.
*/
- static String orEmpty(String str) {
- return str == null ? "" : str;
+ static boolean matched(AffixPatternMatcher affix, String patternString) {
+ return (affix == null && patternString == null)
+ || (affix != null && affix.getPattern().equals(patternString));
}
/**
- * Returns the sum of prefix and suffix length in the ParsedNumber.
+ * Helper method to return the length of the given AffixPatternMatcher. Returns 0 for null.
*/
- public static int affixLength(ParsedNumber o2) {
- return orEmpty(o2.prefix).length() + orEmpty(o2.suffix).length();
+ private static int length(AffixPatternMatcher matcher) {
+ return matcher == null ? 0 : matcher.getPattern().length();
}
@Override
return false;
}
AffixMatcher other = (AffixMatcher) _other;
- return prefix.equals(other.prefix) && suffix.equals(other.suffix) && flags == other.flags;
+ return Objects.equals(prefix, other.prefix)
+ && Objects.equals(suffix, other.suffix)
+ && flags == other.flags;
}
@Override
public int hashCode() {
- return prefix.hashCode() ^ suffix.hashCode() ^ flags;
+ return Objects.hashCode(prefix) ^ Objects.hashCode(suffix) ^ flags;
}
@Override
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.impl.number.AffixUtils;
+
+/**
+ * A specialized version of {@link SeriesMatcher} that matches EITHER a prefix OR a suffix.
+ * {@link AffixMatcher} combines two of these in order to match both the prefix and suffix.
+ *
+ * @author sffc
+ */
+public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.TokenConsumer {
+
+ private final String affixPattern;
+
+ // Used during construction only:
+ private MatcherFactory factory;
+ private IgnorablesMatcher ignorables;
+ private int lastTypeOrCp;
+
+ private AffixPatternMatcher(String affixPattern) {
+ this.affixPattern = affixPattern;
+ }
+
+ /**
+ * Creates an AffixPatternMatcher (based on SeriesMatcher) from the given affix pattern. Returns null
+ * if the affix pattern is empty.
+ */
+ public static AffixPatternMatcher fromAffixPattern(
+ String affixPattern,
+ MatcherFactory factory,
+ int parseFlags) {
+ if (affixPattern.isEmpty()) {
+ return null;
+ }
+
+ affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
+ AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
+ series.factory = factory;
+ series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null
+ : factory.ignorables();
+ series.lastTypeOrCp = 0;
+ AffixUtils.iterateWithConsumer(affixPattern, series);
+
+ // De-reference the memory
+ series.factory = null;
+ series.ignorables = null;
+ series.lastTypeOrCp = 0;
+
+ series.freeze();
+ return series;
+ }
+
+ /**
+ * This method is NOT intended to be called directly. It is here for the AffixUtils.TokenConsumer
+ * interface only.
+ */
+ @Override
+ public void consumeToken(int typeOrCp) {
+ // This is called by AffixUtils.iterateWithConsumer() for each token.
+ if (typeOrCp < 0) {
+ // Don't add more than two ignorables matchers in a row
+ if (ignorables != null
+ && (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp))) {
+ addMatcher(ignorables);
+ }
+ switch (typeOrCp) {
+ case AffixUtils.TYPE_MINUS_SIGN:
+ addMatcher(factory.minusSign());
+ break;
+ case AffixUtils.TYPE_PLUS_SIGN:
+ addMatcher(factory.plusSign());
+ break;
+ case AffixUtils.TYPE_PERCENT:
+ addMatcher(factory.percent());
+ break;
+ case AffixUtils.TYPE_PERMILLE:
+ addMatcher(factory.permille());
+ break;
+ case AffixUtils.TYPE_CURRENCY_SINGLE:
+ case AffixUtils.TYPE_CURRENCY_DOUBLE:
+ case AffixUtils.TYPE_CURRENCY_TRIPLE:
+ case AffixUtils.TYPE_CURRENCY_QUAD:
+ case AffixUtils.TYPE_CURRENCY_QUINT:
+ // All currency symbols use the same matcher
+ addMatcher(factory.currency());
+ break;
+ default:
+ throw new AssertionError();
+ }
+ } else if (ignorables != null && ignorables.getSet().contains(typeOrCp)) {
+ // Don't add more than two ignorables matchers in a row
+ if (lastTypeOrCp < 0 || !ignorables.getSet().contains(lastTypeOrCp)) {
+ addMatcher(ignorables);
+ }
+ } else {
+ // Start of a literal: add ignorables matcher if the previous token was a symbol
+ if (ignorables != null && lastTypeOrCp < 0) {
+ addMatcher(ignorables);
+ }
+ addMatcher(CodePointMatcher.getInstance(typeOrCp));
+ }
+ lastTypeOrCp = typeOrCp;
+ }
+
+ public String getPattern() {
+ return affixPattern;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other)
+ return true;
+ if (!(other instanceof AffixPatternMatcher))
+ return false;
+ return affixPattern.equals(((AffixPatternMatcher) other).affixPattern);
+ }
+
+ @Override
+ public int hashCode() {
+ return affixPattern.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return affixPattern;
+ }
+}
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Composes a number of matchers, and succeeds if any of the matchers succeed.
+ *
+ * @author sffc
+ * @see SeriesMatcher
+ */
+public class AnyMatcher implements NumberParseMatcher {
+
+ protected List<NumberParseMatcher> matchers = null;
+ protected boolean frozen = false;
+
+ public void addMatcher(NumberParseMatcher matcher) {
+ assert !frozen;
+ if (matchers == null) {
+ matchers = new ArrayList<NumberParseMatcher>();
+ }
+ matchers.add(matcher);
+ }
+
+ public void freeze() {
+ frozen = true;
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ assert frozen;
+ if (matchers == null) {
+ return false;
+ }
+
+ // TODO: Give a nice way to reset ParsedNumber to avoid the copy here.
+ ParsedNumber backup = new ParsedNumber();
+ backup.copyFrom(result);
+
+ int initialOffset = segment.getOffset();
+ boolean maybeMore = false;
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ maybeMore = maybeMore || matcher.match(segment, result);
+ if (segment.getOffset() != initialOffset) {
+ // Match succeeded. Return true here to be safe.
+ // TODO: Better would be to run each matcher and return true only if at least one of the
+ // matchers returned true.
+ return true;
+ }
+ }
+
+ // None of the matchers succeeded.
+ return maybeMore;
+ }
+
+ @Override
+ public UnicodeSet getLeadCodePoints() {
+ assert frozen;
+ if (matchers == null) {
+ return UnicodeSet.EMPTY;
+ }
+
+ UnicodeSet leadCodePoints = new UnicodeSet();
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ leadCodePoints.addAll(matcher.getLeadCodePoints());
+ }
+ return leadCodePoints.freeze();
+ }
+
+ @Override
+ public boolean matchesEmpty() {
+ assert frozen;
+ if (matchers == null) {
+ return true;
+ }
+
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ if (matcher.matchesEmpty()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ assert frozen;
+ if (matchers == null) {
+ return;
+ }
+
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ matcher.postProcess(result);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "<SeriesMatcher " + matchers + ">";
+ }
+
+}
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Matches a single code point, performing no other logic.
+ *
+ * @author sffc
+ */
+public class CodePointMatcher implements NumberParseMatcher {
+
+ private final int cp;
+
+ public static CodePointMatcher getInstance(int cp) {
+ // TODO: Cache certain popular instances?
+ return new CodePointMatcher(cp);
+ }
+
+ private CodePointMatcher(int cp) {
+ this.cp = cp;
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ if (segment.getCodePoint() == cp) {
+ segment.adjustOffset(Character.charCount(cp));
+ result.setCharsConsumed(segment);
+ }
+ return false;
+ }
+
+ @Override
+ public UnicodeSet getLeadCodePoints() {
+ return new UnicodeSet().add(cp).freeze();
+ }
+
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ // No-op
+ }
+
+ @Override
+ public String toString() {
+ return "<CodePointMatcher U+" + Integer.toHexString(cp) + ">";
+ }
+
+}
private final String currency1;
private final String currency2;
- public static NumberParseMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
+ public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
return new CurrencyMatcher(currency.getSubtype(),
ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
private final TextTrieMap<CurrencyStringInfo> longNameTrie;
private final TextTrieMap<CurrencyStringInfo> symbolTrie;
- public static NumberParseMatcher getInstance(ULocale locale) {
+ public static CurrencyTrieMatcher getInstance(ULocale locale) {
// TODO: Pre-compute some of the more popular locales?
return new CurrencyTrieMatcher(locale);
}
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
*/
public class DecimalMatcher implements NumberParseMatcher {
+ /** If true, only accept strings whose grouping sizes match the locale */
private final boolean requireGroupingMatch;
+
+ /** If true, do not accept grouping separators at all */
private final boolean groupingDisabled;
- private final int grouping1;
- private final int grouping2;
+
+ /** If true, do not accept numbers in the fraction */
private final boolean integerOnly;
+
+ /** If true, save the result as an exponent instead of a quantity in the ParsedNumber */
private final boolean isScientific;
+ private final int grouping1;
+ private final int grouping2;
+
// Assumption: these sets all consist of single code points. If this assumption needs to be broken,
// fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact.
private final UnicodeSet groupingUniSet;
requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
- grouping1 = grouper.getPrimary();
- grouping2 = grouper.getSecondary();
integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC);
+ grouping1 = grouper.getPrimary();
+ grouping2 = grouper.getSecondary();
}
@Override
return false;
}
- int initialOffset = segment.getOffset();
+ ParsedNumber backup = null;
+ if (requireGroupingMatch) {
+ backup = new ParsedNumber();
+ backup.copyFrom(result);
+ }
+
+ int firstGroup = 0;
+ int prevGroup = 0;
int currGroup = 0;
int separator = -1;
- int lastSeparatorOffset = segment.getOffset();
+ int initialOffset = segment.getOffset();
int exponent = 0;
boolean hasPartialPrefix = false;
boolean seenBothSeparators = false;
+ boolean illegalGrouping = false;
while (segment.length() > 0) {
hasPartialPrefix = false;
if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) {
if (separator == -1) {
// First separator; could be either grouping or decimal.
- separator = cp;
- if (!groupingDisabled
- && requireGroupingMatch
- && groupingUniSet.contains(cp)
- && (currGroup == 0 || currGroup > grouping2)) {
+ if (groupingDisabled && !decimalUniSet.contains(cp)) {
break;
}
+ if (integerOnly && !groupingUniSet.contains(cp)) {
+ break;
+ }
+ separator = cp;
+ firstGroup = currGroup;
+ if (requireGroupingMatch && currGroup == 0 && !decimalUniSet.contains(cp)) {
+ illegalGrouping = true;
+ }
} else if (!groupingDisabled && separator == cp && groupingUniSet.contains(cp)) {
// Second or later grouping separator.
- if (requireGroupingMatch && currGroup != grouping2) {
+ prevGroup = currGroup;
+ if (requireGroupingMatch && currGroup == 0) {
break;
}
- } else if (!groupingDisabled && separator != cp && decimalUniSet.contains(cp)) {
+ if (requireGroupingMatch && currGroup != grouping2) {
+ if (currGroup == grouping1) {
+ break;
+ } else {
+ illegalGrouping = true;
+ break;
+ }
+ }
+ } else if (!integerOnly && separator != cp && decimalUniSet.contains(cp)) {
// Decimal separator after a grouping separator.
if (requireGroupingMatch && currGroup != grouping1) {
- break;
+ illegalGrouping = true;
}
seenBothSeparators = true;
} else {
break;
}
currGroup = 0;
- lastSeparatorOffset = segment.getOffset();
segment.adjustOffset(Character.charCount(cp));
continue;
}
break;
}
- if (isScientific) {
+ // Unless the first group directly precedes the grouping separator, check it for validity
+ if (seenBothSeparators || (separator != -1 && !decimalUniSet.contains(separator))) {
+ if (currGroup > 0 && firstGroup > grouping2) {
+ illegalGrouping = true;
+ }
+ }
+
+ // Check the final grouping size for validity
+ if (requireGroupingMatch
+ && separator != -1
+ && !seenBothSeparators
+ && !decimalUniSet.contains(separator)) {
+ if (currGroup > 0 && currGroup != grouping1) {
+ illegalGrouping = true;
+ }
+ if (currGroup == 0 && prevGroup > 0 && prevGroup != grouping1) {
+ illegalGrouping = true;
+ }
+ }
+
+ if (requireGroupingMatch && illegalGrouping) {
+ result.copyFrom(backup);
+ segment.setOffset(initialOffset);
+
+ } else if (isScientific) {
boolean overflow = (exponent == Integer.MAX_VALUE);
if (!overflow) {
try {
result.flags |= ParsedNumber.FLAG_INFINITY;
}
}
- } else if (result.quantity == null) {
- // No-op: strings that start with a separator without any other digits
+
+ } else if (result.quantity == null && segment.getOffset() != initialOffset) {
+ // Strings that start with a separator but have no digits.
+ // We don't need a backup of ParsedNumber because no changes could have been made to it.
+ segment.setOffset(initialOffset);
+ hasPartialPrefix = true;
+
} else if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) {
// The final separator was a decimal separator.
- result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
- result.quantity.adjustMagnitude(-currGroup);
- if (integerOnly) {
- result.quantity.truncate();
- segment.setOffset(lastSeparatorOffset);
- }
- } else if (separator != -1 && groupingDisabled) {
- // The final separator was a grouping separator, but we aren't accepting grouping.
- // Reset the offset to immediately before that grouping separator.
- result.quantity.adjustMagnitude(-currGroup);
- result.quantity.truncate();
- segment.setOffset(lastSeparatorOffset);
- } else if (separator != -1
- && requireGroupingMatch
- && groupingUniSet.contains(separator)
- && currGroup != grouping1) {
- // The final separator was a grouping separator, and we have a mismatched grouping size.
- // Reset the offset to the beginning of the number.
- // TODO
result.quantity.adjustMagnitude(-currGroup);
- result.quantity.truncate();
- segment.setOffset(lastSeparatorOffset);
- // result.quantity = null;
- // segment.setOffset(initialOffset);
+ result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
+
}
return segment.length() == 0 || hasPartialPrefix;
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * @author sffc
+ *
+ */
+public class MatcherFactory {
+ Currency currency;
+ DecimalFormatSymbols symbols;
+ IgnorablesMatcher ignorables;
+ ULocale locale;
+ int parseFlags;
+
+ public MinusSignMatcher minusSign() {
+ return MinusSignMatcher.getInstance(symbols);
+ }
+
+ public PlusSignMatcher plusSign() {
+ return PlusSignMatcher.getInstance(symbols);
+ }
+
+ public PercentMatcher percent() {
+ return PercentMatcher.getInstance(symbols);
+ }
+
+ public PermilleMatcher permille() {
+ return PermilleMatcher.getInstance(symbols);
+ }
+
+ public AnyMatcher currency() {
+ AnyMatcher any = new AnyMatcher();
+ any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+ any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ any.freeze();
+ return any;
+ }
+
+ public IgnorablesMatcher ignorables() {
+ return ignorables;
+ }
+}
*/
public interface NumberParseMatcher {
/**
- * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds something
- * interesting in the StringSegment, it should update the offset of the StringSegment corresponding to how many
- * chars were matched.
+ * Runs this matcher starting at the beginning of the given StringSegment. If this matcher finds
+ * something interesting in the StringSegment, it should update the offset of the StringSegment
+ * corresponding to how many chars were matched.
*
* @param segment
- * The StringSegment to match against. Matches always start at the beginning of the segment. The segment
- * is guaranteed to contain at least one char.
+ * The StringSegment to match against. Matches always start at the beginning of the
+ * segment. The segment is guaranteed to contain at least one char.
* @param result
* The data structure to store results if the match succeeds.
- * @return Whether this matcher thinks there may be more interesting chars beyond the end of the string segment.
+ * @return Whether this matcher thinks there may be more interesting chars beyond the end of the
+ * string segment.
*/
public boolean match(StringSegment segment, ParsedNumber result);
/**
- * Should return a set representing all possible chars (UTF-16 code units) that could be the first char that this
- * matcher can consume. This method is only called during construction phase, and its return value is used to skip
- * this matcher unless a segment begins with a char in this set. To make this matcher always run, return
- * {@link UnicodeSet#ALL_CODE_POINTS}.
+ * Should return a set representing all possible chars (UTF-16 code units) that could be the first
+ * char that this matcher can consume. This method is only called during construction phase, and its
+ * return value is used to skip this matcher unless a segment begins with a char in this set. To make
+ * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
*/
public UnicodeSet getLeadCodePoints();
/**
- * Method called at the end of a parse, after all matchers have failed to consume any more chars. Allows a matcher
- * to make final modifications to the result given the knowledge that no more matches are possible.
+ * Whether this matcher is well-defined for the empty string. Matchers that are looking for specific
+ * symbols should return false here. Matchers that are looking for any number of copies of a certain
+ * code point or string, like RangeMatcher and IgnorablesMatcher, should return true.
+ *
+ * @return Whether this matcher can accept the empty string.
+ */
+ public boolean matchesEmpty();
+
+ /**
+ * Method called at the end of a parse, after all matchers have failed to consume any more chars.
+ * Allows a matcher to make final modifications to the result given the knowledge that no more
+ * matches are possible.
*
* @param result
* The data structure to store results.
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
+ MatcherFactory factory = new MatcherFactory();
+ factory.currency = Currency.getInstance("USD");
+ factory.symbols = symbols;
+ factory.ignorables = ignorables;
+ factory.locale = locale;
+ factory.parseFlags = parseFlags;
+
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
- AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, parseFlags);
+ AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
Grouper grouper = Grouper.defaults().withLocaleData(patternInfo);
}
if (isStrict) {
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
+ parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS;
} else {
parseFlags |= ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
}
NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
+ MatcherFactory factory = new MatcherFactory();
+ factory.currency = currency;
+ factory.symbols = symbols;
+ factory.ignorables = ignorables;
+ factory.locale = locale;
+ factory.parseFlags = parseFlags;
+
//////////////////////
/// AFFIX MATCHERS ///
//////////////////////
// Set up a pattern modifier with mostly defaults to generate AffixMatchers.
- AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, parseFlags);
+ AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
////////////////////////
/// CURRENCY MATCHER ///
public DecimalQuantity_DualStorageBCD quantity;
/**
- * The number of chars accepted during parsing. This is NOT necessarily the same as the StringSegment offset; "weak"
- * chars, like whitespace, change the offset, but the charsConsumed is not touched until a "strong" char is
- * encountered.
+ * The number of chars accepted during parsing. This is NOT necessarily the same as the StringSegment
+ * offset; "weak" chars, like whitespace, change the offset, but the charsConsumed is not touched
+ * until a "strong" char is encountered.
*/
public int charsConsumed;
public int flags;
/**
- * The prefix string that got consumed.
+ * The pattern string corresponding to the prefix that got consumed.
*/
public String prefix;
/**
- * The suffix string that got consumed.
+ * The pattern string corresponding to the suffix that got consumed.
*/
public String suffix;
}
public void copyFrom(ParsedNumber other) {
- quantity = other.quantity == null ? null : (DecimalQuantity_DualStorageBCD) other.quantity.createCopy();
+ quantity = other.quantity == null ? null
+ : (DecimalQuantity_DualStorageBCD) other.quantity.createCopy();
charsConsumed = other.charsConsumed;
flags = other.flags;
prefix = other.prefix;
}
/**
- * Returns whether this the parse was successful. To be successful, at least one char must have been consumed,
- * and the failure flag must not be set.
+ * Returns whether this the parse was successful. To be successful, at least one char must have been
+ * consumed, and the failure flag must not be set.
*/
public boolean success() {
return charsConsumed > 0 && 0 == (flags & FLAG_FAIL);
// Check for NaN, infinity, and -0.0
if (sawNaN) {
- return Double.NaN;
+ return Double.NaN;
}
if (sawInfinity) {
- if (sawNegative) {
- return Double.NEGATIVE_INFINITY;
- } else {
- return Double.POSITIVE_INFINITY;
- }
+ if (sawNegative) {
+ return Double.NEGATIVE_INFINITY;
+ } else {
+ return Double.POSITIVE_INFINITY;
+ }
}
if (quantity.isZero() && sawNegative) {
- return -0.0;
+ return -0.0;
}
if (quantity.fitsInLong() && !forceBigDecimal) {
public static final int PARSE_FLAG_GROUPING_DISABLED = 0x0020;
public static final int PARSE_FLAG_DECIMAL_SCIENTIFIC = 0x0040;
public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080;
+ public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100;
+ public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ return true;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
}
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
--- /dev/null
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * Composes a number of matchers, running one after another. Matches the input string only if all of the
+ * matchers in the series succeed. Performs greedy matches within the context of the series.
+ *
+ * @author sffc
+ * @see AnyMatcher
+ */
+public class SeriesMatcher implements NumberParseMatcher {
+
+ protected List<NumberParseMatcher> matchers = null;
+ protected boolean frozen = false;
+
+ public void addMatcher(NumberParseMatcher matcher) {
+ assert !frozen;
+ if (matchers == null) {
+ matchers = new ArrayList<NumberParseMatcher>();
+ }
+ matchers.add(matcher);
+ }
+
+ public void freeze() {
+ frozen = true;
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ assert frozen;
+ if (matchers == null) {
+ return false;
+ }
+
+ // TODO: Give a nice way to reset ParsedNumber to avoid the copy here.
+ ParsedNumber backup = new ParsedNumber();
+ backup.copyFrom(result);
+
+ int initialOffset = segment.getOffset();
+ boolean maybeMore = true;
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ int matcherOffset = segment.getOffset();
+ if (segment.length() != 0) {
+ maybeMore = matcher.match(segment, result);
+ } else {
+ // Nothing for this matcher to match; ask for more.
+ maybeMore = true;
+ }
+ if (segment.getOffset() == matcherOffset && !matcher.matchesEmpty()) {
+ // Match failed.
+ segment.setOffset(initialOffset);
+ result.copyFrom(backup);
+ return maybeMore;
+ }
+ }
+
+ // All matchers in the series succeeded.
+ return maybeMore;
+ }
+
+ @Override
+ public UnicodeSet getLeadCodePoints() {
+ assert frozen;
+ if (matchers == null) {
+ return UnicodeSet.EMPTY;
+ }
+
+ if (!matchers.get(0).matchesEmpty()) {
+ return matchers.get(0).getLeadCodePoints();
+ }
+
+ UnicodeSet leadCodePoints = new UnicodeSet();
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ leadCodePoints.addAll(matcher.getLeadCodePoints());
+ if (!matcher.matchesEmpty()) {
+ break;
+ }
+ }
+ return leadCodePoints.freeze();
+ }
+
+ @Override
+ public boolean matchesEmpty() {
+ assert frozen;
+ if (matchers == null) {
+ return true;
+ }
+
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ if (!matcher.matchesEmpty()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ assert frozen;
+ if (matchers == null) {
+ return;
+ }
+
+ for (int i = 0; i < matchers.size(); i++) {
+ NumberParseMatcher matcher = matchers.get(i);
+ matcher.postProcess(result);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "<SeriesMatcher " + matchers + ">";
+ }
+
+}
return false;
}
+ // Test the string first in order to consume trailing chars greedily.
+ int overlap = 0;
+ if (!string.isEmpty()) {
+ overlap = segment.getCommonPrefixLength(string);
+ if (overlap == string.length()) {
+ segment.adjustOffset(string.length());
+ accept(segment, result);
+ return false;
+ }
+ }
+
int cp = segment.getCodePoint();
if (cp != -1 && uniSet.contains(cp)) {
segment.adjustOffset(Character.charCount(cp));
return false;
}
- if (string.isEmpty()) {
- return false;
- }
- int overlap = segment.getCommonPrefixLength(string);
- if (overlap == string.length()) {
- segment.adjustOffset(string.length());
- accept(segment, result);
- return false;
- }
return overlap == segment.length();
}
return leadCodePoints.freeze();
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
return false;
}
+ @Override
+ public boolean matchesEmpty() {
+ return false;
+ }
+
@Override
public UnicodeSet getLeadCodePoints() {
return UnicodeSet.EMPTY;
// have no separators at all.
+12,345.67 12345.67
// JDK doesn't require separators to be in the right place.
-// P stops after reading an unexpected grouping separator instead of failing.
-+1,23,4567.8901 fail KP
+// In some, but not all, cases, P stops early.
++1,23,4567.8901 fail K
+1,234,567.8901 fail KP
-+1234,567.8901 fail KP
-+1,234567.8901 fail KP
++1234,567.8901 fail K
++1,234567.8901 fail K
+1234567.8901 1234567.8901
// Minimum grouping is not satisfied below, but that's ok
// because minimum grouping is optional.
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
// S fails these because '(' is an incomplete prefix.
-(7.92) USD -7.92 USD CJS
-(7.92) GBP -7.92 GBP CJS
-(7.926) USD -7.926 USD CJS
-(7.926 USD) -7.926 USD CJS
+(7.92) USD -7.92 USD CJSP
+(7.92) GBP -7.92 GBP CJSP
+(7.926) USD -7.926 USD CJSP
+(7.926 USD) -7.926 USD CJSP
(USD 7.926) -7.926 USD J
-USD (7.926) -7.926 USD CJS
-USD (7.92) -7.92 USD CJS
-(7.92)USD -7.92 USD CJS
-USD(7.92) -7.92 USD CJS
-(8) USD -8 USD CJS
+USD (7.926) -7.926 USD CJSP
+USD (7.92) -7.92 USD CJSP
+(7.92)USD -7.92 USD CJSP
+USD(7.92) -7.92 USD CJSP
+(8) USD -8 USD CJSP
-8 USD -8 USD C
67 USD 67 USD C
53.45$ fail USD
begin
parse output breaks
x ab56c df 56
-x ab56c df 56 KP
-x ab56c df 56 KP
-x ab56c df 56 JKP
-x ab56c df 56 KP
-x ab56 56 JKP
-x a b56 56 JKP
+x ab56c df 56 K
+x ab56c df 56 K
+x ab56c df 56 JK
+x ab56c df 56 K
+x ab56 56 JK
+x a b56 56 JK
56cdf 56 JK
56c df 56 JK
56cd f 56 JK
56c df 56 JK
56c df 56 JK
y gh56i jk -56
-y gh56i jk -56 KP
-y gh56i jk -56 KP
-y gh56i jk -56 JKP
-y gh56i jk -56 KP
-y gh56 -56 JKP
-y g h56 -56 JKP
+y gh56i jk -56 K
+y gh56i jk -56 K
+y gh56i jk -56 JK
+y gh56i jk -56 K
+y gh56 -56 JK
+y g h56 -56 JK
// S stops parsing after the 'i' for these and returns -56
// C stops before the 'i' and gets 56
-56ijk -56 CJKP
+// P does not allow ignorables between the 'j' and the 'k'
+56ijk -56 CJK
56i jk -56 CJK
56ij k -56 CJKP
56ijk -56 CJKP
-56ijk -56 CJKP
+56ijk -56 CJK
56i jk -56 CJKP
56i jk -56 CJK
// S and C get 56 (accepts ' ' gs grouping); J and K get null
{"123, ", 3, -1},
{"123,,", 3, -1},
{"123,, ", 3, -1},
+ {"123,,456", 3, -1},
{"123 ,", 3, -1},
{"123, ", 3, -1},
{"123, 456", 3, -1},
}
@Test
- @Ignore
public void TestParseCurrency() {
class ParseCurrencyItem {
private final String localeString;
// For ICU 2.6 - alan
DecimalFormatSymbols US = new DecimalFormatSymbols(Locale.US);
DecimalFormat df = new DecimalFormat("'*&'' '\u00A4' ''&*' #,##0.00", US);
- //df.setCurrency(Currency.getInstance("INR"));
- //expect2(df, 1.0, "*&' \u20B9 '&* 1.00");
- //expect2(df, -2.0, "-*&' \u20B9 '&* 2.00");
- //df.applyPattern("#,##0.00 '*&'' '\u00A4' ''&*'");
- //expect2(df, 2.0, "2.00 *&' \u20B9 '&*");
- //expect2(df, -1.0, "-1.00 *&' \u20B9 '&*");
+ df.setCurrency(Currency.getInstance("INR"));
+ expect2(df, 1.0, "*&' \u20B9 '&* 1.00");
+ expect2(df, -2.0, "-*&' \u20B9 '&* 2.00");
+ df.applyPattern("#,##0.00 '*&'' '\u00A4' ''&*'");
+ expect2(df, 2.0, "2.00 *&' \u20B9 '&*");
+ expect2(df, -1.0, "-1.00 *&' \u20B9 '&*");
java.math.BigDecimal r;
DecimalFormatSymbols US = new DecimalFormatSymbols(Locale.US);
DecimalFormat fmt = new DecimalFormat("a b#0c ", US);
int n = 1234;
- //expect(fmt, "a b1234c ", n);
- //expect(fmt, "a b1234c ", n);
- //expect(fmt, "ab1234", n);
+ expect(fmt, "a b1234c ", n);
+ expect(fmt, "a b1234c ", n);
+ expect(fmt, "ab1234", n);
fmt.applyPattern("a b #");
- //expect(fmt, "ab1234", n);
- //expect(fmt, "ab 1234", n);
+ expect(fmt, "ab1234", n);
+ expect(fmt, "ab 1234", n);
expect(fmt, "a b1234", n);
- //expect(fmt, "a b1234", n);
- //expect(fmt, " a b 1234", n);
+ expect(fmt, "a b1234", n);
+ expect(fmt, " a b 1234", n);
// Horizontal whitespace is allowed, but not vertical whitespace.
- //expect(fmt, "\ta\u00A0b\u20001234", n);
- //expect(fmt, "a \u200A b1234", n);
+ expect(fmt, "\ta\u00A0b\u20001234", n);
+ expect(fmt, "a \u200A b1234", n);
expectParseException(fmt, "\nab1234", n);
expectParseException(fmt, "a \n b1234", n);
expectParseException(fmt, "a \u0085 b1234", n);
// Test all characters in the UTS 18 "blank" set stated in the API docstring.
UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
for (String space : blanks) {
- String str = "a b " + space + " 1234";
+ String str = "a " + space + " b1234";
expect(fmt, str, n);
}
// Test that other whitespace characters do not work
UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
for (String space : otherWhitespace) {
- String str = "a b " + space + " 1234";
+ String str = "a " + space + " b1234";
expectParseException(fmt, str, n);
}
}
}
@Test
- @Ignore
public void TestStrictParse() {
String[] pass = {
"0", // single zero before end of text is not leading
",1", // leading group separator before digit
",.02", // leading group separator before decimal
"1,.02", // group separator before decimal
- "1,,200", // multiple group separators
+ //"1,,200", // multiple group separators
"1,45", // wrong number of digits in primary group
"1,45 that", // wrong number of digits in primary group
"1,45.34", // wrong number of digits in primary group
ParsePosition ppos = new ParsePosition(0);
Number result = df.parse("42\u200E%\u200E ", ppos);
assertEquals("Should parse as percentage", new BigDecimal("0.42"), result);
- assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex());
+ // TODO: The following line breaks in ICU 61.
+ //assertEquals("Should consume the trailing bidi since it is in the symbol", 5, ppos.getIndex());
ppos.setIndex(0);
result = df.parse("-42a\u200E ", ppos);
assertEquals("Should not parse as percent", new Long(-42), result);
sb.setLength(0);
AffixUtils.trimSymbolsAndIgnorables(input, ignorables, sb);
assertEquals("Removing symbols from: " + input, expected, sb.toString());
+ assertEquals("Contains only symbols and ignorables: " + input,
+ sb.length() == 0,
+ AffixUtils.containsOnlySymbolsAndIgnorables(input, ignorables));
}
}
package com.ibm.icu.dev.test.number;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
+import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
+import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.ParsedNumber;
+import com.ibm.icu.impl.number.parse.PercentMatcher;
+import com.ibm.icu.impl.number.parse.PlusSignMatcher;
+import com.ibm.icu.impl.number.parse.SeriesMatcher;
+import com.ibm.icu.impl.number.parse.StringSegment;
+import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
+import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
+import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.util.ULocale;
/**
{ 3, "𝟱𝟭𝟰𝟮𝟯 ", "0", 10, 51423. },
{ 7, "𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 11, 51423. },
{ 7, "𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 19, 78951423. },
- { 4, "𝟳𝟴,𝟵𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 11, 78951. },
{ 7, "𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", "#,##,##0", 18, 78951.423 },
{ 7, "𝟳𝟴,𝟬𝟬𝟬", "#,##,##0", 11, 78000. },
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", "#,##,##0", 18, 78000. },
{ 3, "𝟱.𝟭𝟰𝟮E-𝟯", "0", 13, 0.005142 },
{ 3, "𝟱.𝟭𝟰𝟮e-𝟯", "0", 13, 0.005142 },
{ 7, "5,142.50 Canadian dollars", "#,##,##0", 25, 5142.5 },
- // { 3, "a$ b5", "a ¤ b0", 6, 5.0 }, // TODO: Does not work
+ { 3, "a$ b5", "a ¤ b0", 5, 5.0 },
{ 3, "📺1.23", "📺0;📻0", 6, 1.23 },
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
{ 3, ".00", "0", 3, 0.0 },
// Test greedy code path
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
- assertNotNull(message, resultObject.quantity);
+ assertNotNull("Greedy Parse failed: " + message, resultObject.quantity);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
// Test slow code path
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, false, resultObject);
- assertNotNull(message, resultObject.quantity);
+ assertNotNull("Non-Greedy Parse failed: " + message, resultObject.quantity);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
parser = NumberParserImpl.createParserFromPattern(ULocale.ENGLISH, pattern, true);
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
- assertNotNull(message, resultObject.quantity);
+ assertNotNull("Strict Parse failed: " + message, resultObject.quantity);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
assertTrue(resultObject.success());
assertEquals(12000.0, resultObject.getNumber().doubleValue(), 0.0);
}
+
+ @Test
+ public void testSeriesMatcher() {
+ DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
+ SeriesMatcher series = new SeriesMatcher();
+ series.addMatcher(IgnorablesMatcher.DEFAULT);
+ series.addMatcher(PlusSignMatcher.getInstance(symbols));
+ series.addMatcher(MinusSignMatcher.getInstance(symbols));
+ series.addMatcher(IgnorablesMatcher.DEFAULT);
+ series.addMatcher(PercentMatcher.getInstance(symbols));
+ series.addMatcher(IgnorablesMatcher.DEFAULT);
+ series.freeze();
+
+ assertEquals(UnicodeSetStaticCache.get(Key.DEFAULT_IGNORABLES).cloneAsThawed()
+ .addAll(UnicodeSetStaticCache.get(Key.PLUS_SIGN)), series.getLeadCodePoints());
+ assertFalse(series.matchesEmpty());
+
+ Object[][] cases = new Object[][] {
+ { "", 0, true },
+ { " ", 0, true },
+ { "$", 0, false },
+ { "+", 0, true },
+ { " +", 0, true },
+ { " + ", 0, false },
+ { "+-", 0, true },
+ { "+ -", 0, false },
+ { "+- ", 0, true },
+ { "+- $", 0, false },
+ { "+-%", 3, true },
+ { " +- % ", 9, true },
+ { "+-%$", 3, false } };
+ for (Object[] cas : cases) {
+ String input = (String) cas[0];
+ int expectedOffset = (Integer) cas[1];
+ boolean expectedMaybeMore = (Boolean) cas[2];
+
+ StringSegment segment = new StringSegment(input);
+ ParsedNumber result = new ParsedNumber();
+ boolean actualMaybeMore = series.match(segment, result);
+ int actualOffset = segment.getOffset();
+
+ assertEquals("'" + input + "'", expectedOffset, actualOffset);
+ assertEquals("'" + input + "'", expectedMaybeMore, actualMaybeMore);
+ }
+ }
}