public static final int PADDING = 0x400;
}
+ // Convenience compound flags
+ public static final int FLAG_POS_PREFIX = Flags.PREFIX;
+ public static final int FLAG_POS_SUFFIX = 0;
+ public static final int FLAG_NEG_PREFIX = Flags.PREFIX | Flags.NEGATIVE_SUBPATTERN;
+ public static final int FLAG_NEG_SUFFIX = Flags.NEGATIVE_SUBPATTERN;
+
public char charAt(int flags, int i);
public int length(int flags);
package com.ibm.icu.impl.number;
import com.ibm.icu.text.NumberFormat;
+import com.ibm.icu.text.UnicodeSet;
/**
* Performs manipulations on affix patterns: the prefix and suffix strings associated with a decimal
}
/**
- * Appends a new affix pattern with all symbols removed. Like calling unescape with a symbol provider that always
- * returns the empty string.
+ * Appends a new affix pattern with all symbols and code points in the given "ignorables" UnicodeSet stripped out.
+ * Similar to calling unescape with a symbol provider that always returns the empty string.
+ *
+ * <p>
+ * Accepts and returns a StringBuilder, allocating it only if necessary.
*/
- public static void removeSymbols(CharSequence affixPattern, StringBuilder output) {
+ public static StringBuilder withoutSymbolsOrIgnorables(
+ CharSequence affixPattern,
+ UnicodeSet ignorables,
+ StringBuilder sb) {
assert affixPattern != null;
long tag = 0L;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern);
int typeOrCp = getTypeOrCp(tag);
- if (typeOrCp >= 0) {
- output.appendCodePoint(typeOrCp);
+ if (typeOrCp >= 0 && !ignorables.contains(typeOrCp)) {
+ if (sb == null) {
+ // Lazy-initialize the StringBuilder
+ sb = new StringBuilder();
+ }
+ sb.appendCodePoint(typeOrCp);
}
}
+ return sb;
}
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
-import java.util.Collection;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.Comparator;
-import java.util.Set;
-import java.util.TreeSet;
-import com.ibm.icu.impl.StandardPlural;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
-import com.ibm.icu.impl.number.MutablePatternModifier;
-import com.ibm.icu.impl.number.NumberStringBuilder;
+import com.ibm.icu.text.UnicodeSet;
/**
* @author sffc
}
};
- /**
- * Creates multiple AffixMatchers, enough to cover the requirements for the given pattern modifier, appending them
- * in order to the NumberParserImpl.
- */
- public static void generateFromPatternModifier(
- MutablePatternModifier patternModifier,
- int flags,
- boolean includeUnpaired,
- NumberParserImpl output) {
-
- // Store the matchers in a TreeSet to ensure both uniqueness and order.
- Set<AffixMatcher> matchers = new TreeSet<AffixMatcher>(COMPARATOR);
-
- // Construct one matcher per isNegative/plural combination. Most of the time, plurals aren't needed, so only
- // two matchers will be created, one for positive and one for negative.
- NumberStringBuilder nsb = new NumberStringBuilder();
- boolean isNegative = false;
- while (true) {
- if (isNegative) {
- flags |= ParsedNumber.FLAG_NEGATIVE;
+ public static void generateFromAffixPatternProvider(
+ AffixPatternProvider patternInfo,
+ NumberParserImpl output,
+ UnicodeSet ignorables,
+ boolean includeUnpaired) {
+ // Lazy-initialize the StringBuilder.
+ StringBuilder sb = null;
+
+ // Use initial capacity of 6, the highest possible number of AffixMatchers.
+ // TODO: Lazy-initialize?
+ ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6);
+
+ sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_PREFIX, ignorables, sb);
+ String posPrefix = toStringOrEmpty(sb);
+ sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_SUFFIX, ignorables, sb);
+ String posSuffix = toStringOrEmpty(sb);
+
+ if (!posPrefix.isEmpty() || !posSuffix.isEmpty()) {
+ matchers.add(getInstance(posPrefix, posSuffix, 0));
+ if (includeUnpaired && !posPrefix.isEmpty() && !posSuffix.isEmpty()) {
+ matchers.add(getInstance(posPrefix, "", 0));
+ matchers.add(getInstance("", posSuffix, 0));
}
+ }
- if (patternModifier.needsPlurals()) {
- for (StandardPlural plural : StandardPlural.VALUES) {
- patternModifier.setNumberProperties(isNegative, plural);
- AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired);
+ if (patternInfo.hasNegativeSubpattern()) {
+ sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_PREFIX, ignorables, sb);
+ String negPrefix = toStringOrEmpty(sb);
+ sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_SUFFIX, ignorables, sb);
+ String negSuffix = toStringOrEmpty(sb);
+
+ if (negPrefix.equals(posPrefix) && negSuffix.equals(posSuffix)) {
+ // No-op: favor the positive AffixMatcher
+ } else if (!negPrefix.isEmpty() || !negSuffix.isEmpty()) {
+ matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG_NEGATIVE));
+ if (includeUnpaired && !negPrefix.isEmpty() && !negSuffix.isEmpty()) {
+ if (!negPrefix.equals(posPrefix)) {
+ matchers.add(getInstance(negPrefix, "", ParsedNumber.FLAG_NEGATIVE));
+ }
+ if (!negSuffix.equals(posSuffix)) {
+ matchers.add(getInstance("", negSuffix, ParsedNumber.FLAG_NEGATIVE));
+ }
}
- } else {
- patternModifier.setNumberProperties(isNegative, null);
- AffixMatcher.createAndAppendTo(matchers, patternModifier, flags, nsb, includeUnpaired);
- }
-
- if (isNegative) {
- break;
- } else {
- isNegative = true;
}
}
- for (AffixMatcher matcher : matchers) {
- output.addMatcher(matcher);
- }
+ // Put the AffixMatchers in order, and then add them to the output.
+ Collections.sort(matchers, COMPARATOR);
+ output.addMatchers(matchers);
}
- public static void generateFromAffixPatternProvider(AffixPatternProvider patternInfo,
- NumberParserImpl output,
- boolean includeUnpaired) {
- AffixMatcher positive = null;
- AffixMatcher negative = null;
-
- StringBuilder sb = new StringBuilder();
- AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.PREFIX), sb);
- String prefix = sb.toString();
- sb.setLength(0);
- AffixUtils.removeSymbols(patternInfo.getString(/* suffix */ 0), sb);
- String suffix = sb.toString();
- if (!prefix.isEmpty() || !suffix.isEmpty()) {
- positive = new AffixMatcher(prefix, suffix, 0);
- }
-
- if (patternInfo.hasNegativeSubpattern()) {
+ private static StringBuilder getCleanAffix(
+ AffixPatternProvider patternInfo,
+ int flag,
+ UnicodeSet ignorables,
+ StringBuilder sb) {
+ if (sb != null) {
sb.setLength(0);
- AffixUtils.removeSymbols(patternInfo
- .getString(AffixPatternProvider.Flags.PREFIX | AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
- prefix = sb.toString();
- sb.setLength(0);
- AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
- suffix = sb.toString();
- if (!prefix.isEmpty() || !suffix.isEmpty()) {
- negative = new AffixMatcher(prefix, suffix, ParsedNumber.FLAG_NEGATIVE);
- }
}
-
- if (positive != null && negative != null) {
- int comparison = COMPARATOR.compare(positive, negative);
- if (comparison > 0) {
- appendTo(negative, output, includeUnpaired);
- appendTo(positive, output, includeUnpaired);
- } else if (comparison < 0) {
- appendTo(positive, output, includeUnpaired);
- appendTo(negative, output, includeUnpaired);
- } else {
- // The two candidates are equal; favor the positive one
- appendTo(positive, output, includeUnpaired);
- }
- } else if (positive != null) {
- appendTo(positive, output, includeUnpaired);
- } else if (negative != null) {
- appendTo(negative, output, includeUnpaired);
- } else {
- // No affixes to append this time
+ if (patternInfo.length(flag) > 0) {
+ sb = AffixUtils.withoutSymbolsOrIgnorables(patternInfo.getString(flag), ignorables, sb);
}
+ return sb;
}
- private static void appendTo(AffixMatcher matcher, NumberParserImpl output, boolean includeUnpaired) {
- output.addMatcher(matcher);
- if (includeUnpaired && !matcher.prefix.isEmpty() && !matcher.suffix.isEmpty()) {
- output.addMatcher(new AffixMatcher(matcher.prefix, "", matcher.flags));
- output.addMatcher(new AffixMatcher("", matcher.suffix, matcher.flags));
- }
+ private static String toStringOrEmpty(StringBuilder sb) {
+ return (sb == null || sb.length() == 0) ? "" : sb.toString();
}
- /**
- * Constructs one or more AffixMatchers from the given MutablePatternModifier and flags, appending them to the given
- * collection. The NumberStringBuilder is used as a temporary object only.
- *
- * @param includeUnpaired If true, create additional AffixMatchers with an unpaired prefix or suffix.
- */
- private static void createAndAppendTo(
- Collection<AffixMatcher> appendTo,
- MutablePatternModifier patternModifier,
- int flags,
- NumberStringBuilder nsb,
- boolean includeUnpaired) {
- // TODO: Make this more efficient (avoid the substrings and things)
- nsb.clear();
- patternModifier.apply(nsb, 0, 0);
- int prefixLength = patternModifier.getPrefixLength();
- String full = nsb.toString();
- String prefix = full.substring(0, prefixLength);
- String suffix = full.substring(prefixLength);
- appendTo.add(new AffixMatcher(prefix, suffix, flags));
- if (includeUnpaired && !prefix.isEmpty() && !suffix.isEmpty()) {
- appendTo.add(new AffixMatcher(prefix, "", flags));
- appendTo.add(new AffixMatcher("", suffix, flags));
- }
+ private static final AffixMatcher getInstance(String prefix, String suffix, int flags) {
+ // TODO: Special handling for common cases like both strings empty.
+ return new AffixMatcher(prefix, suffix, flags);
}
private AffixMatcher(String prefix, String suffix, int flags) {
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
- if (result.quantity == null) {
+ if (!result.seenNumber()) {
// Prefix
if (result.prefix != null || prefix.length() == 0) {
return false;
@Override
public String toString() {
- return "<AffixMatcher \"" + prefix + "\" \"" + suffix + "\">";
+ boolean isNegative = 0 != (flags & ParsedNumber.FLAG_NEGATIVE);
+ return "<AffixMatcher" + (isNegative ? ":negative " : " ") + prefix + "#" + suffix + ">";
}
}
*/
public class DecimalMatcher implements NumberParseMatcher {
- // TODO: Re-generate these sets from the database. They probably haven't been updated in a while.
- private static final UnicodeSet UNISET_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]")
- .freeze();
- private static final UnicodeSet UNISET_STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]")
- .freeze();
- private static final UnicodeSet UNISET_COMMA_LIKE = new UnicodeSet(
- "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze();
- private static final UnicodeSet UNISET_STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]")
- .freeze();
- private static final UnicodeSet UNISET_OTHER_GROUPING_SEPARATORS = new UnicodeSet(
- "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze();
-
- public static DecimalMatcher getInstance(DecimalFormatSymbols symbols) {
- String groupingSeparator = symbols.getGroupingSeparatorString();
- UnicodeSet groupingSet = UNISET_COMMA_LIKE.contains(groupingSeparator)
- ? UNISET_COMMA_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze()
- : UNISET_PERIOD_LIKE.contains(groupingSeparator)
- ? UNISET_PERIOD_LIKE.cloneAsThawed().addAll(UNISET_OTHER_GROUPING_SEPARATORS).freeze()
- : UNISET_OTHER_GROUPING_SEPARATORS.contains(groupingSeparator)
- ? UNISET_OTHER_GROUPING_SEPARATORS
- : new UnicodeSet().addAll(groupingSeparator).freeze();
-
- String decimalSeparator = symbols.getDecimalSeparatorString();
- UnicodeSet decimalSet = UNISET_COMMA_LIKE.contains(decimalSeparator) ? UNISET_COMMA_LIKE
- : UNISET_PERIOD_LIKE.contains(decimalSeparator) ? UNISET_PERIOD_LIKE
- : new UnicodeSet().addAll(decimalSeparator).freeze();
-
- return new DecimalMatcher(symbols.getDigitStrings(), groupingSet, decimalSet, false);
- }
-
- public static DecimalMatcher getExponentInstance(DecimalFormatSymbols symbols) {
- return new DecimalMatcher(symbols.getDigitStrings(),
- new UnicodeSet("[,]").freeze(),
- new UnicodeSet("[.]").freeze(),
- true);
- }
- private final String[] digitStrings;
- private final UnicodeSet groupingUniSet;
- private final UnicodeSet decimalUniSet;
- private final UnicodeSet separatorSet;
public boolean requireGroupingMatch = false;
+ public boolean decimalEnabled = true;
public boolean groupingEnabled = true;
public int grouping1 = 3;
public int grouping2 = 3;
public boolean integerOnly = false;
- private final boolean isScientific;
-
- private DecimalMatcher(
- String[] digitStrings,
- UnicodeSet groupingUniSet,
- UnicodeSet decimalUniSet,
- boolean isScientific) {
- this.digitStrings = digitStrings;
- this.groupingUniSet = groupingUniSet;
- this.decimalUniSet = decimalUniSet;
- if (groupingEnabled) {
- separatorSet = groupingUniSet.cloneAsThawed().addAll(decimalUniSet).freeze();
- } else {
- separatorSet = decimalUniSet;
- }
- this.isScientific = isScientific;
+ public boolean isScientific = false;
+
+ private UnicodeSet groupingUniSet;
+ private UnicodeSet decimalUniSet;
+ private UnicodeSet separatorSet;
+ private String[] digitStrings;
+ private boolean frozen;
+
+ public DecimalMatcher() {
+ frozen = false;
+ }
+
+ public void freeze(DecimalFormatSymbols symbols, boolean isStrict) {
+ assert !frozen;
+ frozen = true;
+
+ groupingUniSet = SeparatorSetUtils.getGroupingUnicodeSet(symbols, isStrict);
+ decimalUniSet = SeparatorSetUtils.getDecimalUnicodeSet(symbols, isStrict);
+ separatorSet = SeparatorSetUtils.unionUnicodeSets(groupingUniSet, decimalUniSet);
+ digitStrings = symbols.getDigitStringsLocal();
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
- if (result.quantity != null && !isScientific) {
+ assert frozen;
+ if (result.seenNumber() && !isScientific) {
// A number has already been consumed.
return false;
}
if (isScientific) {
result.quantity.adjustMagnitude(exponent);
+ } else if (result.quantity == null) {
+ // No-op: strings that start with a separator without any other digits
} else if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(separator))) {
// The final separator was a decimal separator.
+ result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
result.quantity.adjustMagnitude(-currGroup);
if (integerOnly) {
result.quantity.truncate();
result.quantity.adjustMagnitude(-currGroup);
result.quantity.truncate();
segment.setOffset(lastSeparatorOffset);
-// result.quantity = null;
-// segment.setOffset(initialOffset);
+ // result.quantity = null;
+ // segment.setOffset(initialOffset);
}
return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate();
@Override
public String toString() {
- return "<MantissaMatcher>";
+ return "<DecimalMatcher>";
}
}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class IgnorablesMatcher implements NumberParseMatcher {
+
+ // BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
+ static final UnicodeSet UNISET_BIDI = new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze();
+
+ // This set was decided after discussion with icu-design@. See ticket #13309.
+ // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
+ static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
+
+ /** The default set of ignorables. */
+ static final UnicodeSet DEFAULT_UNISET = UNISET_BIDI.cloneAsThawed().addAll(UNISET_WHITESPACE).freeze();
+
+ /** The default set of ignorables for strict mode. */
+ static final UnicodeSet STRICT_UNISET = UNISET_BIDI;
+
+ private static final IgnorablesMatcher DEFAULT_INSTANCE = new IgnorablesMatcher(DEFAULT_UNISET);
+ private static final IgnorablesMatcher STRICT_INSTANCE = new IgnorablesMatcher(STRICT_UNISET);
+
+ public static IgnorablesMatcher getInstance(UnicodeSet ignorables) {
+ assert ignorables.isFrozen();
+ if (ignorables == DEFAULT_UNISET || ignorables.equals(DEFAULT_UNISET)) {
+ return DEFAULT_INSTANCE;
+ } else if (ignorables == STRICT_UNISET || ignorables.equals(STRICT_UNISET)) {
+ return STRICT_INSTANCE;
+ } else {
+ return new IgnorablesMatcher(ignorables);
+ }
+ }
+
+ private final UnicodeSet ignorables;
+
+ private IgnorablesMatcher(UnicodeSet ignorables) {
+ this.ignorables = ignorables;
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ while (segment.length() > 0) {
+ int cp = segment.getCodePoint();
+ if (cp == -1 || !ignorables.contains(cp)) {
+ break;
+ }
+ segment.adjustOffset(Character.charCount(cp));
+ // Note: Do not touch the charsConsumed.
+ }
+ return segment.length() == 0 || segment.isLeadingSurrogate();
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ // No-op
+ }
+
+ @Override
+ public String toString() {
+ return "<WhitespaceMatcher>";
+ }
+}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+
+/**
+ * @author sffc
+ *
+ */
+public class NanMatcher implements NumberParseMatcher {
+
+ private final String nanString;
+
+ public NanMatcher(DecimalFormatSymbols symbols) {
+ nanString = symbols.getNaN();
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ int overlap = segment.getCommonPrefixLength(nanString);
+ if (overlap == nanString.length()) {
+ result.flags |= ParsedNumber.FLAG_NAN;
+ segment.adjustOffset(overlap);
+ result.setCharsConsumed(segment);
+ return false;
+ } else if (overlap == segment.length()) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ // No-op
+ }
+
+}
import java.text.ParsePosition;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
-import com.ibm.icu.impl.number.MutablePatternModifier;
import com.ibm.icu.impl.number.Parse.ParseMode;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
-import com.ibm.icu.number.NumberFormatter.SignDisplay;
-import com.ibm.icu.number.NumberFormatter.UnitWidth;
import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.ULocale;
ULocale locale = new ULocale("en_IN");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
- MutablePatternModifier mod = new MutablePatternModifier(false);
- AffixPatternProvider provider = PatternStringParser.parseToPatternInfo(pattern);
- mod.setPatternInfo(provider);
- mod.setPatternAttributes(SignDisplay.AUTO, false);
- mod.setSymbols(symbols, Currency.getInstance("USD"), UnitWidth.FULL_NAME, null);
- int flags = 0;
- if (provider.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
- flags |= ParsedNumber.FLAG_PERCENT;
- }
- if (provider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
- flags |= ParsedNumber.FLAG_PERMILLE;
- }
- AffixMatcher.generateFromPatternModifier(mod, flags, true, parser);
+ AffixPatternProvider patternInfo = PatternStringParser.parseToPatternInfo(pattern);
+ AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, new UnicodeSet(), true);
- parser.addMatcher(WhitespaceMatcher.getInstance());
- DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols);
+ parser.addMatcher(IgnorablesMatcher.getInstance(IgnorablesMatcher.DEFAULT_UNISET));
+ DecimalMatcher decimalMatcher = new DecimalMatcher();
decimalMatcher.requireGroupingMatch = strictGrouping;
decimalMatcher.grouping1 = 3;
decimalMatcher.grouping2 = 2;
+ decimalMatcher.freeze(symbols, false);
parser.addMatcher(decimalMatcher);
parser.addMatcher(new MinusSignMatcher());
parser.addMatcher(new ScientificMatcher(symbols));
return parser;
}
- public static Number parseStatic(String input,
- ParsePosition ppos,
- DecimalFormatProperties properties,
- DecimalFormatSymbols symbols) {
+ public static Number parseStatic(
+ String input,
+ ParsePosition ppos,
+ DecimalFormatProperties properties,
+ DecimalFormatSymbols symbols) {
NumberParserImpl parser = createParserFromProperties(properties, symbols, false);
ParsedNumber result = new ParsedNumber();
parser.parse(input, true, result);
}
}
- public static CurrencyAmount parseStaticCurrency(String input,
- ParsePosition ppos,
- DecimalFormatProperties properties,
- DecimalFormatSymbols symbols) {
+ public static CurrencyAmount parseStaticCurrency(
+ String input,
+ ParsePosition ppos,
+ DecimalFormatProperties properties,
+ DecimalFormatSymbols symbols) {
NumberParserImpl parser = createParserFromProperties(properties, symbols, true);
ParsedNumber result = new ParsedNumber();
parser.parse(input, true, result);
ULocale locale = symbols.getULocale();
Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
+ UnicodeSet ignorables = isStrict ? IgnorablesMatcher.STRICT_UNISET : IgnorablesMatcher.DEFAULT_UNISET;
- ////////////////////////
- /// CURRENCY MATCHER ///
- ////////////////////////
-
- if (parseCurrency) {
- parser.addMatcher(new CurrencyMatcher(locale));
- }
+ boolean decimalSeparatorRequired = properties.getDecimalPatternMatchRequired()
+ ? (properties.getDecimalSeparatorAlwaysShown() || properties.getMaximumFractionDigits() != 0)
+ : false;
//////////////////////
/// AFFIX MATCHERS ///
//////////////////////
// Set up a pattern modifier with mostly defaults to generate AffixMatchers.
- MutablePatternModifier mod = new MutablePatternModifier(false);
AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties);
-// mod.setPatternInfo(patternInfo);
-// mod.setPatternAttributes(SignDisplay.AUTO, false);
-// mod.setSymbols(symbols, currency, UnitWidth.SHORT, null);
-//
-// // Figure out which flags correspond to this pattern modifier. Note: negatives are taken care of in the
-// // generateFromPatternModifier function.
-// int flags = 0;
-// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
-// flags |= ParsedNumber.FLAG_PERCENT;
-// }
-// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
-// flags |= ParsedNumber.FLAG_PERMILLE;
-// }
-// if (patternInfo.hasCurrencySign()) {
-// flags |= ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY;
-// }
-//
-// parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
-//
-// AffixMatcher.generateFromPatternModifier(mod, flags, !isStrict && !parseCurrency, parser);
-
- AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, !isStrict);
+ AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, ignorables, !isStrict);
+
+ ////////////////////////
+ /// CURRENCY MATCHER ///
+ ////////////////////////
+
+ parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
+ if (parseCurrency) {
+ parser.addMatcher(new CurrencyMatcher(locale));
+ }
///////////////////////////////
/// OTHER STANDARD MATCHERS ///
///////////////////////////////
if (!isStrict) {
- parser.addMatcher(WhitespaceMatcher.getInstance());
+ parser.addMatcher(IgnorablesMatcher.getInstance(ignorables));
}
if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)) {
parser.addMatcher(new PlusSignMatcher());
}
parser.addMatcher(new MinusSignMatcher());
- DecimalMatcher decimalMatcher = DecimalMatcher.getInstance(symbols);
- decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0;
+ parser.addMatcher(new NanMatcher(symbols));
+ DecimalMatcher decimalMatcher = new DecimalMatcher();
decimalMatcher.requireGroupingMatch = isStrict;
+ decimalMatcher.groupingEnabled = properties.getGroupingSize() > 0;
+ decimalMatcher.decimalEnabled = properties.getDecimalPatternMatchRequired() ? decimalSeparatorRequired : true;
decimalMatcher.grouping1 = properties.getGroupingSize();
decimalMatcher.grouping2 = properties.getSecondaryGroupingSize();
decimalMatcher.integerOnly = properties.getParseIntegerOnly();
+ decimalMatcher.freeze(symbols, isStrict);
parser.addMatcher(decimalMatcher);
if (!properties.getParseNoExponent()) {
parser.addMatcher(new ScientificMatcher(symbols));
if (parseCurrency) {
parser.addMatcher(new RequireCurrencyMatcher());
}
+ if (decimalSeparatorRequired) {
+ parser.addMatcher(new RequireDecimalSeparatorMatcher());
+ }
////////////////////////
/// OTHER ATTRIBUTES ///
}
public void addMatcher(NumberParseMatcher matcher) {
- matchers.add(matcher);
+ assert !frozen;
+ this.matchers.add(matcher);
+ }
+
+ public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
+ assert !frozen;
+ this.matchers.addAll(matchers);
}
public void setComparator(Comparator<ParsedNumber> comparator) {
+ assert !frozen;
this.comparator = comparator;
}
public void setIgnoreCase(boolean ignoreCase) {
+ assert !frozen;
this.ignoreCase = ignoreCase;
}
public static final int FLAG_PERMILLE = 0x0004;
public static final int FLAG_HAS_EXPONENT = 0x0008;
public static final int FLAG_HAS_DEFAULT_CURRENCY = 0x0010;
+ public static final int FLAG_HAS_DECIMAL_SEPARATOR = 0x0020;
+ public static final int FLAG_NAN = 0x0040;
/** A Comparator that favors ParsedNumbers with the most chars consumed. */
public static final Comparator<ParsedNumber> COMPARATOR = new Comparator<ParsedNumber>() {
charsConsumed = segment.getOffset();
}
+ public boolean seenNumber() {
+ return quantity != null || 0 != (flags & FLAG_NAN);
+ }
+
public double getDouble() {
+ if (0 != (flags & FLAG_NAN)) {
+ return Double.NaN;
+ }
double d = quantity.toDouble();
if (0 != (flags & FLAG_NEGATIVE)) {
d = -d;
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+/**
+ * @author sffc
+ *
+ */
+public class RequireDecimalSeparatorMatcher implements NumberParseMatcher {
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ return false;
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ if (0 == (result.flags & ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR)) {
+ result.clear();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "<RequireDecimalSeparator>";
+ }
+}
@Override
public void postProcess(ParsedNumber result) {
// Require that a number is matched.
- if (result.quantity == null) {
+ if (!result.seenNumber()) {
result.clear();
}
}
public ScientificMatcher(DecimalFormatSymbols symbols) {
exponentSeparatorString = symbols.getExponentSeparator();
minusSignString = symbols.getMinusSignString();
- exponentMatcher = DecimalMatcher.getExponentInstance(symbols);
+ exponentMatcher = new DecimalMatcher();
+ exponentMatcher.isScientific = true;
+ exponentMatcher.groupingEnabled = false;
+ exponentMatcher.decimalEnabled = false;
+ exponentMatcher.freeze(symbols, false);
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
// Only accept scientific notation after the mantissa.
- if (result.quantity == null) {
+ if (!result.seenNumber()) {
return false;
}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public class SeparatorSetUtils {
+
+ // TODO: Re-generate these sets from the database. They probably haven't been updated in a while.
+
+ static final UnicodeSet COMMA_LIKE = new UnicodeSet(
+ "[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze();
+
+ static final UnicodeSet STRICT_COMMA_LIKE = new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze();
+
+ static final UnicodeSet PERIOD_LIKE = new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze();
+
+ static final UnicodeSet STRICT_PERIOD_LIKE = new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze();
+
+ static final UnicodeSet OTHER_GROUPING_SEPARATORS = new UnicodeSet(
+ "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]").freeze();
+
+ static final UnicodeSet COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE).freeze();
+
+ static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+ .addAll(STRICT_PERIOD_LIKE).freeze();
+
+ static final UnicodeSet COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(OTHER_GROUPING_SEPARATORS)
+ .freeze();
+
+ static final UnicodeSet STRICT_COMMA_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+ .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+ static final UnicodeSet PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(PERIOD_LIKE)
+ .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+ static final UnicodeSet STRICT_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_PERIOD_LIKE)
+ .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+ static final UnicodeSet COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(COMMA_LIKE).addAll(PERIOD_LIKE)
+ .addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+ static final UnicodeSet STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER = new UnicodeSet().addAll(STRICT_COMMA_LIKE)
+ .addAll(STRICT_PERIOD_LIKE).addAll(OTHER_GROUPING_SEPARATORS).freeze();
+
+ public static UnicodeSet getGroupingUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
+ if (isStrict) {
+ return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
+ STRICT_COMMA_LIKE_OR_OTHER,
+ STRICT_PERIOD_LIKE_OR_OTHER,
+ OTHER_GROUPING_SEPARATORS);
+ } else {
+ return chooseUnicodeSet(symbols.getGroupingSeparatorString(),
+ COMMA_LIKE_OR_OTHER,
+ PERIOD_LIKE_OR_OTHER,
+ OTHER_GROUPING_SEPARATORS);
+ }
+ }
+
+ public static UnicodeSet getDecimalUnicodeSet(DecimalFormatSymbols symbols, boolean isStrict) {
+ if (isStrict) {
+ return chooseUnicodeSet(symbols.getDecimalSeparatorString(), STRICT_COMMA_LIKE, STRICT_PERIOD_LIKE);
+ } else {
+ return chooseUnicodeSet(symbols.getDecimalSeparatorString(), COMMA_LIKE, PERIOD_LIKE);
+ }
+ }
+
+ private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1) {
+ return set1.contains(str) ? set1 : new UnicodeSet().add(str).freeze();
+ }
+
+ private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2) {
+ return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2);
+ }
+
+ private static UnicodeSet chooseUnicodeSet(String str, UnicodeSet set1, UnicodeSet set2, UnicodeSet set3) {
+ return set1.contains(str) ? set1 : chooseUnicodeSet(str, set2, set3);
+ }
+
+ public static UnicodeSet unionUnicodeSets(UnicodeSet set1, UnicodeSet set2) {
+ // Note: == operators should be okay here since non-static UnicodeSets happen only in fallback cases.
+ if (set1 == UnicodeSet.EMPTY && set2 == UnicodeSet.EMPTY) {
+ return UnicodeSet.EMPTY;
+ } else if (set1 == COMMA_LIKE_OR_OTHER && set2 == PERIOD_LIKE_OR_OTHER) {
+ return COMMA_OR_PERIOD_LIKE_OR_OTHER;
+ } else if (set1 == PERIOD_LIKE_OR_OTHER && set2 == COMMA_LIKE_OR_OTHER) {
+ return COMMA_OR_PERIOD_LIKE_OR_OTHER;
+ } else if (set1 == STRICT_COMMA_LIKE_OR_OTHER && set2 == STRICT_PERIOD_LIKE_OR_OTHER) {
+ return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
+ } else if (set1 == STRICT_PERIOD_LIKE_OR_OTHER && set2 == STRICT_COMMA_LIKE_OR_OTHER) {
+ return STRICT_COMMA_OR_PERIOD_LIKE_OR_OTHER;
+ } else if (set1 == COMMA_LIKE && set2 == PERIOD_LIKE) {
+ return COMMA_OR_PERIOD_LIKE;
+ } else if (set1 == PERIOD_LIKE && set2 == COMMA_LIKE) {
+ return COMMA_OR_PERIOD_LIKE;
+ } else if (set1 == STRICT_COMMA_LIKE && set2 == STRICT_PERIOD_LIKE) {
+ return STRICT_COMMA_OR_PERIOD_LIKE;
+ } else if (set1 == STRICT_PERIOD_LIKE && set2 == STRICT_COMMA_LIKE) {
+ return STRICT_COMMA_OR_PERIOD_LIKE;
+ } else {
+ return set1.cloneAsThawed().addAll(set2).freeze();
+ }
+ }
+}
+++ /dev/null
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
-
-import com.ibm.icu.text.UnicodeSet;
-
-/**
- * @author sffc
- *
- */
-public class WhitespaceMatcher implements NumberParseMatcher {
-
- // This set was decided after discussion with icu-design@. See ticket #13309.
- // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
- private static final UnicodeSet UNISET_WHITESPACE = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
-
- private static final WhitespaceMatcher INSTANCE = new WhitespaceMatcher();
-
- public static WhitespaceMatcher getInstance() {
- return INSTANCE;
- }
-
- private WhitespaceMatcher() {
- }
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- while (segment.length() > 0) {
- int cp = segment.getCodePoint();
- if (cp == -1 || !UNISET_WHITESPACE.contains(cp)) {
- break;
- }
- segment.adjustOffset(Character.charCount(cp));
- // Note: Do not touch the charsConsumed.
- }
- return segment.length() == 0 || segment.isLeadingSurrogate();
- }
-
- @Override
- public void postProcess(ParsedNumber result) {
- // No-op
- }
-
- @Override
- public String toString() {
- return "<WhitespaceMatcher>";
- }
-}
set pattern '-'#y
begin
parse output breaks
-// FIXME
--45y 45 P
+-45y 45
test parse with locale symbols
// The grouping separator in it_CH is an apostrophe
(USD 7.926) -7.926 USD CJ
USD (7.926) -7.926 USD CJ
USD (7.92) -7.92 USD CJ
-(7.92)USD -7.92 USD CJP
+(7.92)USD -7.92 USD CJ
USD(7.92) -7.92 USD CJ
(8) USD -8 USD
-8 USD -8 USD C
US Dollar (53.45) -53.45 USD CJ
(53.45) US Dollar -53.45 USD
US Dollars(53.45) -53.45 USD CJ
-(53.45)US Dollars -53.45 USD CJP
+(53.45)US Dollars -53.45 USD CJ
US Dollar(53.45) -53.45 USD CJ
US Dollat(53.45) fail USD
-(53.45)US Dollar -53.45 USD CJP
+(53.45)US Dollar -53.45 USD CJ
test parse currency ISO negative
53.45 fail GBP
£53.45 53.45 GBP
$53.45 fail USD J
-// FIXME: Fix the failures in this section. Positive/negative mixup.
-53.45 USD 53.45 USD P
-53.45 GBP 53.45 GBP P
+53.45 USD 53.45 USD
+53.45 GBP 53.45 GBP
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-7.92USD -7.92 USD CJ
USD-7.92 -7.92 USD CJ
-8 USD -8 USD
-67 USD 67 USD P
+67 USD 67 USD
53.45$ fail USD
US Dollars 53.45 53.45 USD J
-53.45 US Dollars 53.45 USD P
+53.45 US Dollars 53.45 USD
US Dollar 53.45 53.45 USD J
-53.45 US Dollar 53.45 USD P
+53.45 US Dollar 53.45 USD
US Dollars53.45 53.45 USD
53.45US Dollars 53.45 USD CJ
US Dollar53.45 53.45 USD
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-(7.92) USD -7.92 USD P
-(7.92) GBP -7.92 GBP P
-(7.926) USD -7.926 USD P
-(7.926 USD) -7.926 USD CJP
-(USD 7.926) -7.926 USD CJP
-USD (7.926) -7.926 USD CJP
-USD (7.92) -7.92 USD CJP
-(7.92)USD -7.92 USD CJP
-USD(7.92) -7.92 USD CJP
-(8) USD -8 USD P
+(7.92) USD -7.92 USD
+(7.92) GBP -7.92 GBP
+(7.926) USD -7.926 USD
+(7.926 USD) -7.926 USD CJ
+(USD 7.926) -7.926 USD CJ
+USD (7.926) -7.926 USD CJ
+USD (7.92) -7.92 USD CJ
+(7.92)USD -7.92 USD CJ
+USD(7.92) -7.92 USD CJ
+(8) USD -8 USD
-8 USD -8 USD C
67 USD 67 USD
// J throws a NullPointerException on the next case
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-(7.92) USD -7.92 USD P
-(7.92) GBP -7.92 GBP P
-(7.926) USD -7.926 USD P
-(7.926 USD) -7.926 USD CJP
-(USD 7.926) -7.926 USD CJP
-USD (7.926) -7.926 USD CJP
-USD (7.92) -7.92 USD CJP
-(7.92)USD -7.92 USD CJP
-USD(7.92) -7.92 USD CJP
-(8) USD -8 USD P
+(7.92) USD -7.92 USD
+(7.92) GBP -7.92 GBP
+(7.926) USD -7.926 USD
+(7.926 USD) -7.926 USD CJ
+(USD 7.926) -7.926 USD CJ
+USD (7.926) -7.926 USD CJ
+USD (7.92) -7.92 USD CJ
+(7.92)USD -7.92 USD CJ
+USD(7.92) -7.92 USD CJ
+(8) USD -8 USD
-8 USD -8 USD C
67 USD 67 USD
53.45$ fail USD
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
// S fails these because '(' is an incomplete prefix.
-(7.92) USD -7.92 USD CJSP
-(7.92) GBP -7.92 GBP CJSP
-(7.926) USD -7.926 USD CJSP
-(7.926 USD) -7.926 USD CJSP
-(USD 7.926) -7.926 USD JP
-USD (7.926) -7.926 USD CJSP
-USD (7.92) -7.92 USD CJSP
-(7.92)USD -7.92 USD CJSP
-USD(7.92) -7.92 USD CJSP
-(8) USD -8 USD CJSP
+(7.92) USD -7.92 USD CJS
+(7.92) GBP -7.92 GBP CJS
+(7.926) USD -7.926 USD CJS
+(7.926 USD) -7.926 USD CJS
+(USD 7.926) -7.926 USD J
+USD (7.926) -7.926 USD CJS
+USD (7.92) -7.92 USD CJS
+(7.92)USD -7.92 USD CJS
+USD(7.92) -7.92 USD CJS
+(8) USD -8 USD CJS
-8 USD -8 USD C
67 USD 67 USD C
53.45$ fail USD
test parse currency without currency mode
// Should accept a symbol associated with the currency specified by the API,
// but should not traverse the full currency data.
+// P always traverses full currency data.
set locale en_US
set pattern \u00a4#,##0.00
begin
parse currency output breaks
$52.41 USD 52.41
USD52.41 USD 52.41 K
-\u20ac52.41 USD fail
-EUR52.41 USD fail
-$52.41 EUR fail
-USD52.41 EUR fail
+\u20ac52.41 USD fail P
+EUR52.41 USD fail P
+$52.41 EUR fail P
+USD52.41 EUR fail P
\u20ac52.41 EUR 52.41 K
EUR52.41 EUR 52.41
begin
pattern parse output breaks
// K doesn't support this feature.
+// P stops parsing when it sees the decimal separator, but doesn't fail.
0 123 123
-0 123. fail CJK
-0 1.23 fail CJK
+0 123. fail CJKP
+0 1.23 fail CJKP
0 -513 -513
-0 -513. fail CJK
-0 -5.13 fail CJK
+0 -513. fail CJKP
+0 -5.13 fail CJKP
0.0 123 fail K
0.0 123. 123 C
0.0 1.23 1.23 C
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.AffixUtils.SymbolProvider;
import com.ibm.icu.impl.number.NumberStringBuilder;
+import com.ibm.icu.text.UnicodeSet;
public class AffixUtilsTest {
}
@Test
- public void testRemoveSymbols() {
+ public void testWithoutSymbolsOrIgnorables() {
String[][] cases = {
{"", ""},
{"-", ""},
+ {" ", ""},
{"'-'", "-"},
- {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤", "abcdefgh"},
+ {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤i\tj", "abcdefghij"},
};
+ UnicodeSet ignorables = new UnicodeSet("[:whitespace:]");
StringBuilder sb = new StringBuilder();
for (String[] cas : cases) {
String input = cas[0];
String expected = cas[1];
sb.setLength(0);
- AffixUtils.removeSymbols(input, sb);
+ AffixUtils.withoutSymbolsOrIgnorables(input, ignorables, sb);
assertEquals("Removing symbols from: " + input, expected, sb.toString());
}
}
{ 3, "a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 14, 51423. },
{ 3, "-a 𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
{ 3, "a -𝟱𝟭𝟰𝟮𝟯 b", "a0b", 15, -51423. },
- { 3, "𝟱𝟭𝟰𝟮𝟯", "0;(0)", 10, 51423. },
- { 3, "(𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 12, -51423. },
- { 3, "𝟱𝟭𝟰𝟮𝟯)", "0;(0)", 11, -51423. },
- { 3, "(𝟱𝟭𝟰𝟮𝟯", "0;(0)", 11, -51423. },
+ { 3, "𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 10, 51423. },
+ { 3, "[𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, 51423. },
+ { 3, "𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 11, 51423. },
+ { 3, "[𝟱𝟭𝟰𝟮𝟯]", "[0];(0)", 12, 51423. },
+ { 3, "(𝟱𝟭𝟰𝟮𝟯", "[0];(0)", 11, -51423. },
+ { 3, "𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 11, -51423. },
+ { 3, "(𝟱𝟭𝟰𝟮𝟯)", "[0];(0)", 12, -51423. },
+ { 3, "𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 10, 51423. },
+ { 3, "{𝟱𝟭𝟰𝟮𝟯", "{0};{0}", 11, 51423. },
+ { 3, "𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 11, 51423. },
+ { 3, "{𝟱𝟭𝟰𝟮𝟯}", "{0};{0}", 12, 51423. },
{ 1, "a40b", "a0'0b'", 3, 40. }, // greedy code path thinks "40" is the number
{ 2, "a40b", "a0'0b'", 4, 4. }, // slow code path find the suffix "0b"
{ 3, "𝟱.𝟭𝟰𝟮E𝟯", "0", 12, 5142. },