import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
/**
* TextTrieMap is a trie implementation for supporting
}
}
+ public void putLeadChars(UnicodeSet output) {
+ _root.putLeadChars(output);
+ }
+
/**
* Creates an object that consumes code points one at a time and returns intermediate prefix
* matches. Returns null if no match exists.
return match;
}
+ public void putLeadChars(UnicodeSet output) {
+ if (_children == null) {
+ return;
+ }
+ for (Node child : _children) {
+ output.add(child._text[0]);
+ }
+ }
+
public class StepResult {
public Node node;
public int offset;
return false;
}
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadingChar(prefix, leadChars, ignoreCase);
+ ParsingUtils.putLeadingChar(suffix, leadChars, ignoreCase);
+ return leadChars.freeze();
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// Check to see if our affix is the one that was matched. If so, set the flags in the result.
import java.util.Iterator;
import com.ibm.icu.impl.TextTrieMap;
+import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.Currency.CurrencyStringInfo;
import com.ibm.icu.util.ULocale;
return trieOutput.partialMatch;
}
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ longNameTrie.putLeadChars(leadChars);
+ symbolTrie.putLeadChars(leadChars);
+ return leadChars.freeze();
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSurrogate();
}
+ private static final UnicodeSet UNISET_DIGITS = new UnicodeSet("[:digit:]");
+
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadSurrogates(UNISET_DIGITS, leadChars);
+ for (int i = 0; i < digitStrings.length; i++) {
+ ParsingUtils.putLeadingChar(digitStrings[i], leadChars, ignoreCase);
+ }
+ ParsingUtils.putLeadSurrogates(separatorSet, leadChars);
+ return leadChars.freeze();
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
* @author sffc
*
*/
-public class IgnorablesMatcher implements NumberParseMatcher {
+public class IgnorablesMatcher extends RangeMatcher {
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
static final UnicodeSet UNISET_BIDI = new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze();
}
}
- private final UnicodeSet ignorables;
-
private IgnorablesMatcher(UnicodeSet ignorables) {
- this.ignorables = ignorables;
+ super(ignorables);
}
@Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- while (segment.length() > 0) {
- int cp = segment.getCodePoint();
- if (cp == -1 || !ignorables.contains(cp)) {
- break;
- }
- segment.adjustOffset(Character.charCount(cp));
- // Note: Do not touch the charsConsumed.
- }
- return segment.length() == 0 || segment.isLeadingSurrogate();
+ protected boolean isDisabled(ParsedNumber result) {
+ return false;
}
@Override
- public void postProcess(ParsedNumber result) {
+ protected void accept(StringSegment segment, ParsedNumber result) {
// No-op
}
@Override
public String toString() {
- return "<WhitespaceMatcher>";
+ return "<IgnorablesMatcher>";
}
}
}
@Override
- protected void accept(ParsedNumber result) {
+ protected void accept(StringSegment segment, ParsedNumber result) {
result.flags |= ParsedNumber.FLAG_NEGATIVE;
+ result.setCharsConsumed(segment);
}
@Override
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
/**
* @author sffc
*
*/
-public class NanMatcher implements NumberParseMatcher {
-
- private final String nanString;
+public class NanMatcher extends SymbolMatcher {
public NanMatcher(DecimalFormatSymbols symbols) {
- nanString = symbols.getNaN();
+ super(symbols.getNaN(), UnicodeSet.EMPTY);
}
@Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- int overlap = segment.getCommonPrefixLength(nanString);
- if (overlap == nanString.length()) {
- result.flags |= ParsedNumber.FLAG_NAN;
- segment.adjustOffset(overlap);
- result.setCharsConsumed(segment);
- return false;
- } else if (overlap == segment.length()) {
- return true;
- } else {
- return false;
- }
+ protected boolean isDisabled(ParsedNumber result) {
+ return result.seenNumber();
}
@Override
- public void postProcess(ParsedNumber result) {
- // No-op
+ protected void accept(StringSegment segment, ParsedNumber result) {
+ result.flags |= ParsedNumber.FLAG_NAN;
+ result.setCharsConsumed(segment);
}
@Override
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.text.UnicodeSet;
+
/**
* @author sffc
*
*/
public boolean match(StringSegment segment, ParsedNumber result);
+ /**
+ * Should return a set representing all possible chars (UTF-16 code units) that could be the first char that this
+ * matcher can consume. This method is only called during construction phase, and its return value is used to skip
+ * this matcher unless a segment begins with a char in this set. To make this matcher always run, return
+ * {@link UnicodeSet#ALL_CODE_POINTS}.
+ */
+ public UnicodeSet getLeadChars(boolean ignoreCase);
+
/**
* Method called at the end of a parse, after all matchers have failed to consume any more chars. Allows a matcher
* to make final modifications to the result given the knowledge that no more matches are possible.
*
*/
public class NumberParserImpl {
+ @Deprecated
public static NumberParserImpl createParserFromPattern(String pattern, boolean strictGrouping) {
// Temporary frontend for testing.
- NumberParserImpl parser = new NumberParserImpl();
+ NumberParserImpl parser = new NumberParserImpl(true);
ULocale locale = new ULocale("en_IN");
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
}
}
+ public static NumberParserImpl createDefaultParserForLocale(ULocale loc) {
+ DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(loc);
+ DecimalFormatProperties properties = PatternStringParser.parseToProperties("0");
+ return createParserFromProperties(properties, symbols, false);
+ }
+
public static NumberParserImpl createParserFromProperties(
DecimalFormatProperties properties,
DecimalFormatSymbols symbols,
boolean parseCurrency) {
- NumberParserImpl parser = new NumberParserImpl();
+ NumberParserImpl parser = new NumberParserImpl(!properties.getParseCaseSensitive());
ULocale locale = symbols.getULocale();
Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols);
boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
parser.addMatcher(new RequireDecimalSeparatorMatcher());
}
- ////////////////////////
- /// OTHER ATTRIBUTES ///
- ////////////////////////
-
- parser.setIgnoreCase(!properties.getParseCaseSensitive());
-
- System.out.println(parser);
-
parser.freeze();
return parser;
}
+ private final boolean ignoreCase;
private final List<NumberParseMatcher> matchers;
+ private final List<UnicodeSet> leadCharses;
private Comparator<ParsedNumber> comparator;
- private boolean ignoreCase;
private boolean frozen;
- public NumberParserImpl() {
+ public NumberParserImpl(boolean ignoreCase) {
matchers = new ArrayList<NumberParseMatcher>();
+ leadCharses = new ArrayList<UnicodeSet>();
comparator = ParsedNumber.COMPARATOR; // default value
- ignoreCase = true;
+ this.ignoreCase = ignoreCase;
frozen = false;
}
public void addMatcher(NumberParseMatcher matcher) {
assert !frozen;
this.matchers.add(matcher);
+ UnicodeSet leadChars = matcher.getLeadChars(ignoreCase);
+ assert leadChars.isFrozen();
+ this.leadCharses.add(leadChars);
}
public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
assert !frozen;
this.matchers.addAll(matchers);
+ for (NumberParseMatcher matcher : matchers) {
+ UnicodeSet leadChars = matcher.getLeadChars(ignoreCase);
+ assert leadChars.isFrozen();
+ this.leadCharses.add(leadChars);
+ }
}
public void setComparator(Comparator<ParsedNumber> comparator) {
this.comparator = comparator;
}
- public void setIgnoreCase(boolean ignoreCase) {
- assert !frozen;
- this.ignoreCase = ignoreCase;
- }
-
public void freeze() {
frozen = true;
}
}
int initialOffset = segment.getOffset();
+ char leadChar = ignoreCase ? ParsingUtils.getCaseFoldedLeadingChar(segment) : segment.charAt(0);
for (int i = 0; i < matchers.size(); i++) {
+ if (!leadCharses.get(i).contains(leadChar)) {
+ continue;
+ }
NumberParseMatcher matcher = matchers.get(i);
matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSet.EntryRange;
+
+/**
+ * A collection of utility functions used by the number parsing package.
+ */
+public class ParsingUtils {
+
+ /**
+ * Adds all chars and lead surrogates from input into output.
+ */
+ public static void putLeadSurrogates(UnicodeSet input, UnicodeSet output) {
+ if (input.isEmpty()) {
+ return;
+ }
+ for (EntryRange range : input.ranges()) {
+ if (range.codepointEnd <= 0xFFFF) {
+ // All BMP chars
+ output.add(range.codepoint, range.codepointEnd);
+ } else {
+ // Need to get the lead surrogates
+ // TODO: Make this more efficient?
+ if (range.codepoint <= 0xFFFF) {
+ output.add(range.codepoint, 0xFFFF);
+ }
+ for (int cp = Math.max(0x10000, range.codepoint); cp <= range.codepointEnd; cp++) {
+ output.add(UTF16.getLeadSurrogate(cp));
+ }
+ }
+ }
+ }
+
+ /**
+ * Adds the first char of the given string to leadChars, performing case-folding if necessary.
+ */
+ public static void putLeadingChar(String str, UnicodeSet leadChars, boolean ignoreCase) {
+ if (str.isEmpty()) {
+ return;
+ }
+ if (ignoreCase) {
+ leadChars.add(getCaseFoldedLeadingChar(str));
+ } else {
+ leadChars.add(str.charAt(0));
+ }
+ }
+
+ public static char getCaseFoldedLeadingChar(CharSequence str) {
+ int cp = UCharacter.foldCase(Character.codePointAt(str, 0), true);
+ if (cp <= 0xFFFF) {
+ return (char) cp;
+ } else {
+ return UTF16.getLeadSurrogate(cp);
+ }
+ }
+
+}
}
@Override
- protected void accept(ParsedNumber result) {
+ protected void accept(StringSegment segment, ParsedNumber result) {
result.flags |= ParsedNumber.FLAG_PERCENT;
+ result.setCharsConsumed(segment);
}
@Override
}
@Override
- protected void accept(ParsedNumber result) {
+ protected void accept(StringSegment segment, ParsedNumber result) {
result.flags |= ParsedNumber.FLAG_PERMILLE;
+ result.setCharsConsumed(segment);
}
@Override
}
@Override
- protected void accept(ParsedNumber result) {
- // No-op
+ protected void accept(StringSegment segment, ParsedNumber result) {
+ result.setCharsConsumed(segment);
}
@Override
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * @author sffc
+ *
+ */
+public abstract class RangeMatcher implements NumberParseMatcher {
+ protected final UnicodeSet uniSet;
+
+ protected RangeMatcher(UnicodeSet uniSet) {
+ this.uniSet = uniSet;
+ }
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ // Smoke test first; this matcher might be disabled.
+ if (isDisabled(result)) {
+ return false;
+ }
+
+ while (segment.length() > 0) {
+ int cp = segment.getCodePoint();
+ if (cp != -1 && uniSet.contains(cp)) {
+ segment.adjustOffset(Character.charCount(cp));
+ accept(segment, result);
+ continue;
+ }
+
+ // If we get here, the code point didn't match the uniSet.
+ return segment.isLeadingSurrogate();
+ }
+
+ // If we get here, we consumed the entire string segment.
+ return true;
+ }
+
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadSurrogates(uniSet, leadChars);
+ return leadChars.freeze();
+ }
+
+ @Override
+ public void postProcess(ParsedNumber result) {
+ // No-op
+ }
+
+ protected abstract boolean isDisabled(ParsedNumber result);
+
+ protected abstract void accept(StringSegment segment, ParsedNumber result);
+
+}
* @author sffc
*
*/
-public class RequireAffixMatcher implements NumberParseMatcher {
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- return false;
- }
+public class RequireAffixMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
* @author sffc
*
*/
-public class RequireCurrencyMatcher implements NumberParseMatcher {
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- return false;
- }
+public class RequireCurrencyMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
* @author sffc
*
*/
-public class RequireDecimalSeparatorMatcher implements NumberParseMatcher {
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- return false;
- }
+public class RequireDecimalSeparatorMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
* @author sffc
*
*/
-public class RequireExponentMatcher implements NumberParseMatcher {
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- return false;
- }
+public class RequireExponentMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
* @author sffc
*
*/
-public class RequireNumberMatcher implements NumberParseMatcher {
-
- @Override
- public boolean match(StringSegment segment, ParsedNumber result) {
- return false;
- }
+public class RequireNumberMatcher extends ValidationMatcher {
@Override
public void postProcess(ParsedNumber result) {
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.UnicodeSet;
/**
* @author sffc
return false;
}
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadingChar(exponentSeparatorString, leadChars, ignoreCase);
+ return leadChars.freeze();
+ }
+
@Override
public void postProcess(ParsedNumber result) {
// No-op
*
*/
public abstract class SymbolMatcher implements NumberParseMatcher {
- private final String string;
- private final UnicodeSet uniSet;
+ protected final String string;
+ protected final UnicodeSet uniSet;
protected SymbolMatcher(String symbolString, UnicodeSet symbolUniSet) {
string = symbolString;
int cp = segment.getCodePoint();
if (cp != -1 && uniSet.contains(cp)) {
- accept(result);
segment.adjustOffset(Character.charCount(cp));
+ accept(segment, result);
return false;
}
int overlap = segment.getCommonPrefixLength(string);
if (overlap == string.length()) {
- accept(result);
segment.adjustOffset(string.length());
+ accept(segment, result);
return false;
}
- return overlap == segment.length();
+ return overlap == segment.length() || segment.isLeadingSurrogate();
+ }
+
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ UnicodeSet leadChars = new UnicodeSet();
+ ParsingUtils.putLeadSurrogates(uniSet, leadChars);
+ ParsingUtils.putLeadingChar(string, leadChars, ignoreCase);
+ return leadChars.freeze();
}
@Override
protected abstract boolean isDisabled(ParsedNumber result);
- protected abstract void accept(ParsedNumber result);
+ protected abstract void accept(StringSegment segment, ParsedNumber result);
}
--- /dev/null
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.impl.number.parse;
+
+import com.ibm.icu.text.UnicodeSet;
+
+/**
+ * A Matcher used only for post-process validation, not for consuming characters at runtime.
+ */
+public abstract class ValidationMatcher implements NumberParseMatcher {
+
+ @Override
+ public boolean match(StringSegment segment, ParsedNumber result) {
+ return false;
+ }
+
+ @Override
+ public UnicodeSet getLeadChars(boolean ignoreCase) {
+ return UnicodeSet.EMPTY;
+ }
+
+}
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
assertNotNull(message, resultObject.quantity);
- assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
+ assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
if (0 != (flags & 0x02)) {
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, false, resultObject);
assertNotNull(message, resultObject.quantity);
- assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
+ assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
if (0 != (flags & 0x04)) {
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
assertNotNull(message, resultObject.quantity);
- assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
assertEquals(message, expectedCharsConsumed, resultObject.charsConsumed);
+ assertEquals(message, resultDouble, resultObject.getNumber().doubleValue(), 0.0);
}
}
}