From 519d2a5a86e919476df825b26bee80d8b81d340e Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 27 Jan 2018 11:45:00 +0000 Subject: [PATCH] ICU-13513 Removing ICU 59 parsing code. X-SVN-Rev: 40814 --- .../src/com/ibm/icu/impl/TextTrieMap.java | 122 - .../impl/number/DecimalFormatProperties.java | 44 +- .../src/com/ibm/icu/impl/number/Parse.java | 2385 ----------------- .../impl/number/parse/NumberParserImpl.java | 38 +- .../src/com/ibm/icu/text/DecimalFormat.java | 6 +- .../icu/text/ScientificNumberFormatter.java | 6 +- .../core/src/com/ibm/icu/util/Currency.java | 15 - .../data/numberformattestspecification.txt | 42 +- .../format/NumberFormatDataDrivenTest.java | 96 +- .../icu/dev/test/number/PropertiesTest.java | 9 +- .../icu/dev/test/util/TextTrieMapTest.java | 123 - 11 files changed, 69 insertions(+), 2817 deletions(-) delete mode 100644 icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java index b178f3a79c7..764d09adab7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/TextTrieMap.java @@ -14,7 +14,6 @@ import java.util.List; import java.util.ListIterator; import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; /** @@ -120,85 +119,6 @@ public class TextTrieMap { _root.putLeadCodePoints(output); } - /** - * Creates an object that consumes code points one at a time and returns intermediate prefix - * matches. Returns null if no match exists. - * - * @return An instance of {@link ParseState}, or null if the starting code point is not a - * prefix for any entry in the trie. - */ - public ParseState openParseState(int startingCp) { - // Check to see whether this is a valid starting character. If not, return null. - if (_ignoreCase) { - startingCp = UCharacter.foldCase(startingCp, true); - } - int count = Character.charCount(startingCp); - char ch1 = (count == 1) ? (char) startingCp : UTF16.getLeadSurrogate(startingCp); - if (!_root.hasChildFor(ch1)) { - return null; - } - - return new ParseState(_root); - } - - /** - * ParseState is mutable, not thread-safe, and intended to be used internally by parsers for - * consuming values from this trie. - */ - public class ParseState { - private Node node; - private int offset; - private Node.StepResult result; - - ParseState(Node start) { - node = start; - offset = 0; - result = start.new StepResult(); - } - - /** - * Consumes a code point and walk to the next node in the trie. - * - * @param cp The code point to consume. - */ - public void accept(int cp) { - assert node != null; - if (_ignoreCase) { - cp = UCharacter.foldCase(cp, true); - } - int count = Character.charCount(cp); - char ch1 = (count == 1) ? (char) cp : UTF16.getLeadSurrogate(cp); - node.takeStep(ch1, offset, result); - if (count == 2 && result.node != null) { - char ch2 = UTF16.getTrailSurrogate(cp); - result.node.takeStep(ch2, result.offset, result); - } - node = result.node; - offset = result.offset; - } - - /** - * Gets the exact prefix matches for all code points that have been consumed so far. - * - * @return The matches. - */ - public Iterator getCurrentMatches() { - if (node != null && offset == node.charCount()) { - return node.values(); - } - return null; - } - - /** - * Checks whether any more code points can be consumed. - * - * @return true if no more code points can be consumed; false otherwise. - */ - public boolean atEnd() { - return node == null || (node.charCount() == offset && node._children == null); - } - } - public static class CharIterator implements Iterator { private boolean _ignoreCase; private CharSequence _text; @@ -332,17 +252,6 @@ public class TextTrieMap { return _text == null ? 0 : _text.length; } - public boolean hasChildFor(char ch) { - for (int i=0; _children != null && i < _children.size(); i++) { - Node child = _children.get(i); - if (ch < child._text[0]) break; - if (ch == child._text[0]) { - return true; - } - } - return false; - } - public Iterator values() { if (_values == null) { return null; @@ -405,37 +314,6 @@ public class TextTrieMap { } } - public class StepResult { - public Node node; - public int offset; - } - public void takeStep(char ch, int offset, StepResult result) { - assert offset <= charCount(); - if (offset == charCount()) { - // Go to a child node - for (int i=0; _children != null && i < _children.size(); i++) { - Node child = _children.get(i); - if (ch < child._text[0]) break; - if (ch == child._text[0]) { - // Found a matching child node - result.node = child; - result.offset = 1; - return; - } - } - // No matching children; fall through - } else if (_text[offset] == ch) { - // Return to this node; increase offset - result.node = this; - result.offset = offset + 1; - return; - } - // No matches - result.node = null; - result.offset = -1; - return; - } - private void add(char[] text, int offset, V value) { if (text.length == offset) { _values = addValue(_values, value); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java index 73d41e5b2fb..94ddef3ddef 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java @@ -16,8 +16,7 @@ import java.util.ArrayList; import java.util.Map; import com.ibm.icu.impl.number.Padder.PadPosition; -import com.ibm.icu.impl.number.Parse.GroupingMode; -import com.ibm.icu.impl.number.Parse.ParseMode; +import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode; import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; import com.ibm.icu.text.CurrencyPluralInfo; import com.ibm.icu.text.PluralRules; @@ -73,7 +72,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { private transient PadPosition padPosition; private transient String padString; private transient boolean parseCaseSensitive; - private transient GroupingMode parseGroupingMode; private transient boolean parseIntegerOnly; private transient ParseMode parseMode; private transient boolean parseNoExponent; @@ -145,7 +143,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { padPosition = null; padString = null; parseCaseSensitive = false; - parseGroupingMode = null; parseIntegerOnly = false; parseMode = null; parseNoExponent = false; @@ -191,7 +188,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { padPosition = other.padPosition; padString = other.padString; parseCaseSensitive = other.parseCaseSensitive; - parseGroupingMode = other.parseGroupingMode; parseIntegerOnly = other.parseIntegerOnly; parseMode = other.parseMode; parseNoExponent = other.parseNoExponent; @@ -238,7 +234,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { eq = eq && _equalsHelper(padPosition, other.padPosition); eq = eq && _equalsHelper(padString, other.padString); eq = eq && _equalsHelper(parseCaseSensitive, other.parseCaseSensitive); - eq = eq && _equalsHelper(parseGroupingMode, other.parseGroupingMode); eq = eq && _equalsHelper(parseIntegerOnly, other.parseIntegerOnly); eq = eq && _equalsHelper(parseMode, other.parseMode); eq = eq && _equalsHelper(parseNoExponent, other.parseNoExponent); @@ -301,7 +296,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { hashCode ^= _hashCodeHelper(padPosition); hashCode ^= _hashCodeHelper(padString); hashCode ^= _hashCodeHelper(parseCaseSensitive); - hashCode ^= _hashCodeHelper(parseGroupingMode); hashCode ^= _hashCodeHelper(parseIntegerOnly); hashCode ^= _hashCodeHelper(parseMode); hashCode ^= _hashCodeHelper(parseNoExponent); @@ -484,10 +478,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { return parseCaseSensitive; } - public GroupingMode getParseGroupingMode() { - return parseGroupingMode; - } - public boolean getParseIntegerOnly() { return parseIntegerOnly; } @@ -1082,34 +1072,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable { return this; } - /** - * Sets the strategy used during parsing when a code point needs to be interpreted as either a - * decimal separator or a grouping separator. - * - *

- * The comma, period, space, and apostrophe have different meanings in different locales. For - * example, in en-US and most American locales, the period is used as a decimal separator, - * but in es-PY and most European locales, it is used as a grouping separator. - * - *

- * Suppose you are in fr-FR the parser encounters the string "1.234". In fr-FR, the - * grouping is a space and the decimal is a comma. The grouping mode is a mechanism to let - * you specify whether to accept the string as 1234 (GroupingMode.DEFAULT) or whether to reject it - * since the separators don't match (GroupingMode.RESTRICTED). - * - *

- * When resolving grouping separators, it is the equivalence class of separators that is - * considered. For example, a period is seen as equal to a fixed set of other period-like characters. - * - * @param parseGroupingMode - * The {@link GroupingMode} to use; either DEFAULT or RESTRICTED. - * @return The property bag, for chaining. - */ - public DecimalFormatProperties setParseGroupingMode(GroupingMode parseGroupingMode) { - this.parseGroupingMode = parseGroupingMode; - return this; - } - /** * Whether to ignore the fractional part of numbers. For example, parses "123.4" to "123" instead of * "123.4". @@ -1150,8 +1112,8 @@ public class DecimalFormatProperties implements Cloneable, Serializable { } /** - * Whether to always return a BigDecimal from {@link Parse#parse} and all other parse methods. By - * default, a Long or a BigInteger are returned when possible. + * Whether to always return a BigDecimal from parse methods. By default, a Long or a BigInteger are + * returned when possible. * * @param parseToBigDecimal * true to always return a BigDecimal; false to return a Long or a BigInteger when diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java deleted file mode 100644 index efb0e5002d8..00000000000 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java +++ /dev/null @@ -1,2385 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html#License -package com.ibm.icu.impl.number; - -import java.math.BigDecimal; -import java.math.MathContext; -import java.text.ParseException; -import java.text.ParsePosition; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -import com.ibm.icu.impl.StandardPlural; -import com.ibm.icu.impl.TextTrieMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.CurrencyPluralInfo; -import com.ibm.icu.text.DecimalFormatSymbols; -import com.ibm.icu.text.NumberFormat; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.Currency; -import com.ibm.icu.util.Currency.CurrencyStringInfo; -import com.ibm.icu.util.CurrencyAmount; -import com.ibm.icu.util.ULocale; - -/** - * A parser designed to convert an arbitrary human-generated string to its best representation as a - * number: a long, a BigInteger, or a BigDecimal. - * - *

The parser may traverse multiple parse paths in the same strings if there is ambiguity. For - * example, the string "12,345.67" has two main interpretations: it could be "12.345" in a locale - * that uses '.' as the grouping separator, or it could be "12345.67" in a locale that uses ',' as - * the grouping separator. Since the second option has a longer parse path (consumes more of the - * input string), the parser will accept the second option. - */ -public class Parse { - - /** Controls the set of rules for parsing a string. */ - public static enum ParseMode { - /** - * Lenient mode should be used if you want to accept malformed user input. It will use - * heuristics to attempt to parse through typographical errors in the string. - */ - LENIENT, - - /** - * Strict mode should be used if you want to require that the input is well-formed. More - * specifically, it differs from lenient mode in the following ways: - * - *

    - *
  • Grouping widths must match the grouping settings. For example, "12,3,45" will fail if - * the grouping width is 3, as in the pattern "#,##0". - *
  • The string must contain a complete prefix and suffix. For example, if the pattern is - * "{#};(#)", then "{123}" or "(123)" would match, but "{123", "123}", and "123" would all - * fail. (The latter strings would be accepted in lenient mode.) - *
  • Whitespace may not appear at arbitrary places in the string. In lenient mode, - * whitespace is allowed to occur arbitrarily before and after prefixes and exponent - * separators. - *
  • Leading grouping separators are not allowed, as in ",123". - *
  • Minus and plus signs can only appear if specified in the pattern. In lenient mode, a - * plus or minus sign can always precede a number. - *
  • The set of characters that can be interpreted as a decimal or grouping separator is - * smaller. - *
  • If currency parsing is enabled, currencies must only appear where - * specified in either the current pattern string or in a valid pattern string for the - * current locale. For example, if the pattern is "¤0.00", then "$1.23" would match, but - * "1.23$" would fail to match. - *
- */ - STRICT, - - /** - * Fast mode should be used in applications that don't require prefixes and suffixes to match. - * - *

In addition to ignoring prefixes and suffixes, fast mode performs the following - * optimizations: - * - *

    - *
  • Ignores digit strings from {@link DecimalFormatSymbols} and only uses the code point's - * Unicode digit property. If you are not using custom digit strings, this should not - * cause a change in behavior. - *
  • Instead of traversing multiple possible parse paths, a "greedy" parsing strategy is - * used, which might mean that fast mode won't accept strings that lenient or strict mode - * would accept. Since prefix and suffix strings are ignored, this is not an issue unless - * you are using custom symbols. - *
- */ - FAST, - } - - /** - * An enum containing the choices for strategy in parsing when choosing between grouping and - * decimal separators. - */ - public static enum GroupingMode { - /** - * Accept decimal equivalents as decimals, and if that fails, accept all equivalence classes - * (periods, commas, and whitespace-like) as grouping. This is a more lenient strategy. - * - *

For example, if the formatter's current locale is fr-FR, then "1.234" will parse - * as 1234, even though fr-FR does not use a period as the grouping separator. - */ - DEFAULT, - - /** - * Accept decimal equivalents as decimals and grouping equivalents as grouping. This strategy is - * more strict. - * - *

For example, if the formatter's current locale is fr-FR, then "1.234" will fail - * to parse since fr-FR does not use a period as the grouping separator. - */ - RESTRICTED - } - - /** - * @see Parse#parse(String, ParsePosition, ParseMode, boolean, boolean, DecimalFormatProperties, - * DecimalFormatSymbols) - */ - private static enum StateName { - BEFORE_PREFIX, - AFTER_PREFIX, - AFTER_INTEGER_DIGIT, - AFTER_FRACTION_DIGIT, - AFTER_EXPONENT_SEPARATOR, - AFTER_EXPONENT_DIGIT, - BEFORE_SUFFIX, - BEFORE_SUFFIX_SEEN_EXPONENT, - AFTER_SUFFIX, - INSIDE_CURRENCY, - INSIDE_DIGIT, - INSIDE_STRING, - INSIDE_AFFIX_PATTERN; - } - - // This set was decided after discussion with icu-design@. See ticket #13309. - // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). - private static final UnicodeSet UNISET_WHITESPACE = - new UnicodeSet("[[:Zs:][\\u0009]]").freeze(); - - // BiDi characters are skipped over and ignored at any point in the string, even in strict mode. - private static final UnicodeSet UNISET_BIDI = - new UnicodeSet("[[\\u200E\\u200F\\u061C]]").freeze(); - - // TODO: Re-generate these sets from the database. They probably haven't been updated in a while. - private static final UnicodeSet UNISET_PERIOD_LIKE = - new UnicodeSet("[.\\u2024\\u3002\\uFE12\\uFE52\\uFF0E\\uFF61]").freeze(); - private static final UnicodeSet UNISET_STRICT_PERIOD_LIKE = - new UnicodeSet("[.\\u2024\\uFE52\\uFF0E\\uFF61]").freeze(); - private static final UnicodeSet UNISET_COMMA_LIKE = - new UnicodeSet("[,\\u060C\\u066B\\u3001\\uFE10\\uFE11\\uFE50\\uFE51\\uFF0C\\uFF64]").freeze(); - private static final UnicodeSet UNISET_STRICT_COMMA_LIKE = - new UnicodeSet("[,\\u066B\\uFE10\\uFE50\\uFF0C]").freeze(); - private static final UnicodeSet UNISET_OTHER_GROUPING_SEPARATORS = - new UnicodeSet( - "[\\ '\\u00A0\\u066C\\u2000-\\u200A\\u2018\\u2019\\u202F\\u205F\\u3000\\uFF07]") - .freeze(); - - // For parse return value calculation. - private static final BigDecimal MIN_LONG_AS_BIG_DECIMAL = new BigDecimal(Long.MIN_VALUE); - private static final BigDecimal MAX_LONG_AS_BIG_DECIMAL = new BigDecimal(Long.MAX_VALUE); - - private enum SeparatorType { - COMMA_LIKE, - PERIOD_LIKE, - OTHER_GROUPING, - UNKNOWN; - - static SeparatorType fromCp(int cp, ParseMode mode) { - if (mode == ParseMode.FAST) { - return SeparatorType.UNKNOWN; - } else if (mode == ParseMode.STRICT) { - if (UNISET_STRICT_COMMA_LIKE.contains(cp)) return COMMA_LIKE; - if (UNISET_STRICT_PERIOD_LIKE.contains(cp)) return PERIOD_LIKE; - if (UNISET_OTHER_GROUPING_SEPARATORS.contains(cp)) return OTHER_GROUPING; - return UNKNOWN; - } else { - if (UNISET_COMMA_LIKE.contains(cp)) return COMMA_LIKE; - if (UNISET_PERIOD_LIKE.contains(cp)) return PERIOD_LIKE; - if (UNISET_OTHER_GROUPING_SEPARATORS.contains(cp)) return OTHER_GROUPING; - return UNKNOWN; - } - } - } - - private static enum DigitType { - INTEGER, - FRACTION, - EXPONENT - } - - /** - * Holds a snapshot in time of a single parse path. This includes the digits seen so far, the - * current state name, and other properties like the grouping separator used on this parse path, - * details about the exponent and negative signs, etc. - */ - private static class StateItem { - // Parser state: - // The "trailingChars" is used to keep track of how many characters from the end of the string - // are ignorable and should be removed from the parse position should this item be accepted. - // The "score" is used to help rank two otherwise equivalent parse paths. Currently, the only - // function giving points to the score is prefix/suffix. - StateName name; - int trailingCount; - int score; - - // Numerical value: - DecimalQuantity_DualStorageBCD fq = new DecimalQuantity_DualStorageBCD(); - int numDigits; - int trailingZeros; - int exponent; - - // Other items that we've seen: - int groupingCp; - long groupingWidths; - String isoCode; - boolean sawNegative; - boolean sawNegativeExponent; - boolean sawCurrency; - boolean sawNaN; - boolean sawInfinity; - AffixHolder affix; - boolean sawPrefix; - boolean sawSuffix; - boolean sawDecimalPoint; - boolean sawExponentDigit; - - // Data for intermediate parsing steps: - StateName returnTo1; - StateName returnTo2; - // For string literals: - CharSequence currentString; - int currentOffset; - boolean currentTrailing; - // For affix patterns: - CharSequence currentAffixPattern; - long currentStepwiseParserTag; - // For currency: - TextTrieMap.ParseState currentCurrencyTrieState; - // For multi-code-point digits: - TextTrieMap.ParseState currentDigitTrieState; - DigitType currentDigitType; - - // Identification for path tracing: - final char id; - String path; - - StateItem(char _id) { - id = _id; - } - - /** - * Clears the instance so that it can be re-used. - * - * @return Myself, for chaining. - */ - StateItem clear() { - // Parser state: - name = StateName.BEFORE_PREFIX; - trailingCount = 0; - score = 0; - - // Numerical value: - fq.clear(); - numDigits = 0; - trailingZeros = 0; - exponent = 0; - - // Other items we've seen: - groupingCp = -1; - groupingWidths = 0L; - isoCode = null; - sawNegative = false; - sawNegativeExponent = false; - sawCurrency = false; - sawNaN = false; - sawInfinity = false; - affix = null; - sawPrefix = false; - sawSuffix = false; - sawDecimalPoint = false; - sawExponentDigit = false; - - // Data for intermediate parsing steps: - returnTo1 = null; - returnTo2 = null; - currentString = null; - currentOffset = 0; - currentTrailing = false; - currentAffixPattern = null; - currentStepwiseParserTag = 0L; - currentCurrencyTrieState = null; - currentDigitTrieState = null; - currentDigitType = null; - - // Identification for path tracing: - // id is constant and is not cleared - path = ""; - - return this; - } - - /** - * Sets the internal value of this instance equal to another instance. - * - *

newName and cpOrN1 are required as parameters to this function because every time a code - * point is consumed and a state item is copied, both of the corresponding fields should be - * updated; it would be an error if they weren't updated. - * - * @param other The instance to copy from. - * @param newName The state name that the new copy should take on. - * @param trailing If positive, record this code point as trailing; if negative, reset the - * trailing count to zero. - * @return Myself, for chaining. - */ - StateItem copyFrom(StateItem other, StateName newName, int trailing) { - // Parser state: - name = newName; - score = other.score; - - // Either reset trailingCount or add the width of the current code point. - trailingCount = (trailing < 0) ? 0 : other.trailingCount + Character.charCount(trailing); - - // Numerical value: - fq.copyFrom(other.fq); - numDigits = other.numDigits; - trailingZeros = other.trailingZeros; - exponent = other.exponent; - - // Other items we've seen: - groupingCp = other.groupingCp; - groupingWidths = other.groupingWidths; - isoCode = other.isoCode; - sawNegative = other.sawNegative; - sawNegativeExponent = other.sawNegativeExponent; - sawCurrency = other.sawCurrency; - sawNaN = other.sawNaN; - sawInfinity = other.sawInfinity; - affix = other.affix; - sawPrefix = other.sawPrefix; - sawSuffix = other.sawSuffix; - sawDecimalPoint = other.sawDecimalPoint; - sawExponentDigit = other.sawExponentDigit; - - // Data for intermediate parsing steps: - returnTo1 = other.returnTo1; - returnTo2 = other.returnTo2; - currentString = other.currentString; - currentOffset = other.currentOffset; - currentTrailing = other.currentTrailing; - currentAffixPattern = other.currentAffixPattern; - currentStepwiseParserTag = other.currentStepwiseParserTag; - currentCurrencyTrieState = other.currentCurrencyTrieState; - currentDigitTrieState = other.currentDigitTrieState; - currentDigitType = other.currentDigitType; - - // Record source node if debugging - if (DEBUGGING) { - path = other.path + other.id; - } - - return this; - } - - /** - * Adds a digit to the internal representation of this instance. - * - * @param digit The digit that was read from the string. - * @param type Whether the digit occured after the decimal point. - */ - void appendDigit(byte digit, DigitType type) { - if (type == DigitType.EXPONENT) { - sawExponentDigit = true; - int newExponent = exponent * 10 + digit; - if (newExponent < exponent) { - // overflow - exponent = Integer.MAX_VALUE; - } else { - exponent = newExponent; - } - } else { - numDigits++; - if (type == DigitType.FRACTION && digit == 0) { - trailingZeros++; - } else if (type == DigitType.FRACTION) { - fq.appendDigit(digit, trailingZeros, false); - trailingZeros = 0; - } else { - fq.appendDigit(digit, 0, true); - } - } - } - - /** @return Whether or not this item contains a valid number. */ - public boolean hasNumber() { - return numDigits > 0 || sawNaN || sawInfinity; - } - - /** - * Converts the internal digits from this instance into a Number, preferring a Long, then a - * BigInteger, then a BigDecimal. A Double is used for NaN, infinity, and -0.0. - * - * @return The Number. Never null. - */ - Number toNumber(DecimalFormatProperties properties) { - // Check for NaN, infinity, and -0.0 - if (sawNaN) { - return Double.NaN; - } - if (sawInfinity) { - if (sawNegative) { - return Double.NEGATIVE_INFINITY; - } else { - return Double.POSITIVE_INFINITY; - } - } - if (fq.isZero() && sawNegative) { - return -0.0; - } - - // Check for exponent overflow - boolean forceBigDecimal = properties.getParseToBigDecimal(); - if (exponent == Integer.MAX_VALUE) { - if (sawNegativeExponent && sawNegative) { - return -0.0; - } else if (sawNegativeExponent) { - return 0.0; - } else if (sawNegative) { - return Double.NEGATIVE_INFINITY; - } else { - return Double.POSITIVE_INFINITY; - } - } else if (exponent > 1000) { - // BigDecimals can handle huge values better than BigIntegers. - forceBigDecimal = true; - } - - // Multipliers must be applied in reverse. - BigDecimal multiplier = properties.getMultiplier(); - if (properties.getMagnitudeMultiplier() != 0) { - if (multiplier == null) multiplier = BigDecimal.ONE; - multiplier = multiplier.scaleByPowerOfTen(properties.getMagnitudeMultiplier()); - } - int delta = (sawNegativeExponent ? -1 : 1) * exponent; - - // We need to use a math context in order to prevent non-terminating decimal expansions. - // This is only used when dividing by the multiplier. - MathContext mc = RoundingUtils.getMathContextOr34Digits(properties); - - // Construct the output number. - // This is the only step during fast-mode parsing that incurs object creations. - BigDecimal result = fq.toBigDecimal(); - if (sawNegative) result = result.negate(); - result = result.scaleByPowerOfTen(delta); - if (multiplier != null) { - result = result.divide(multiplier, mc); - } - result = result.stripTrailingZeros(); - if (forceBigDecimal || result.scale() > 0) { - return result; - } else if (result.compareTo(MIN_LONG_AS_BIG_DECIMAL) >= 0 - && result.compareTo(MAX_LONG_AS_BIG_DECIMAL) <= 0) { - return result.longValueExact(); - } else { - return result.toBigIntegerExact(); - } - } - - /** - * Converts the internal digits to a number, and also associates the number with the parsed - * currency. - * - * @return The CurrencyAmount. Never null. - */ - public CurrencyAmount toCurrencyAmount(DecimalFormatProperties properties) { - assert isoCode != null; - Number number = toNumber(properties); - Currency currency = Currency.getInstance(isoCode); - return new CurrencyAmount(number, currency); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("["); - sb.append(path); - sb.append("] "); - sb.append(name.name()); - if (name == StateName.INSIDE_STRING) { - sb.append("{"); - sb.append(currentString); - sb.append(":"); - sb.append(currentOffset); - sb.append("}"); - } - if (name == StateName.INSIDE_AFFIX_PATTERN) { - sb.append("{"); - sb.append(currentAffixPattern); - sb.append(":"); - sb.append(AffixUtils.getOffset(currentStepwiseParserTag) - 1); - sb.append("}"); - } - sb.append(" "); - sb.append(fq.toBigDecimal()); - sb.append(" grouping:"); - sb.append(groupingCp == -1 ? new char[] {'?'} : Character.toChars(groupingCp)); - sb.append(" widths:"); - sb.append(Long.toHexString(groupingWidths)); - sb.append(" seen:"); - sb.append(sawNegative ? 1 : 0); - sb.append(sawNegativeExponent ? 1 : 0); - sb.append(sawNaN ? 1 : 0); - sb.append(sawInfinity ? 1 : 0); - sb.append(sawPrefix ? 1 : 0); - sb.append(sawSuffix ? 1 : 0); - sb.append(sawDecimalPoint ? 1 : 0); - sb.append(" trailing:"); - sb.append(trailingCount); - sb.append(" score:"); - sb.append(score); - sb.append(" affix:"); - sb.append(affix); - sb.append(" currency:"); - sb.append(isoCode); - return sb.toString(); - } - } - - /** - * Holds an ordered list of {@link StateItem} and other metadata about the string to be parsed. - * There are two internal arrays of {@link StateItem}, which are swapped back and forth in order - * to avoid object creations. The items in one array can be populated at the same time that items - * in the other array are being read from. - */ - private static class ParserState { - - // Basic ParserStateItem lists: - StateItem[] items = new StateItem[16]; - StateItem[] prevItems = new StateItem[16]; - int length; - int prevLength; - - // Properties and Symbols memory: - DecimalFormatProperties properties; - DecimalFormatSymbols symbols; - ParseMode mode; - boolean caseSensitive; - boolean parseCurrency; - GroupingMode groupingMode; - - // Other pre-computed fields: - int decimalCp1; - int decimalCp2; - int groupingCp1; - int groupingCp2; - SeparatorType decimalType1; - SeparatorType decimalType2; - SeparatorType groupingType1; - SeparatorType groupingType2; - - TextTrieMap digitTrie; - Set affixHolders = new HashSet(); - - ParserState() { - for (int i = 0; i < items.length; i++) { - items[i] = new StateItem((char) ('A' + i)); - prevItems[i] = new StateItem((char) ('A' + i)); - } - } - - /** - * Clears the internal state in order to prepare for parsing a new string. - * - * @return Myself, for chaining. - */ - ParserState clear() { - length = 0; - prevLength = 0; - digitTrie = null; - affixHolders.clear(); - return this; - } - - /** - * Swaps the internal arrays of {@link StateItem}. Sets the length of the primary list to zero, - * so that it can be appended to. - */ - void swap() { - StateItem[] temp = prevItems; - prevItems = items; - items = temp; - prevLength = length; - length = 0; - } - - /** - * Swaps the internal arrays of {@link StateItem}. Sets the length of the primary list to the - * length of the previous list, so that it can be read from. - */ - void swapBack() { - StateItem[] temp = prevItems; - prevItems = items; - items = temp; - length = prevLength; - prevLength = 0; - } - - /** - * Gets the next available {@link StateItem} from the primary list for writing. This method - * should be thought of like a list append method, except that there are no object creations - * taking place. - * - *

It is the caller's responsibility to call either {@link StateItem#clear} or {@link - * StateItem#copyFrom} on the returned object. - * - * @return A dirty {@link StateItem}. - */ - StateItem getNext() { - if (length >= items.length) { - // TODO: What to do here? Expand the array? - // This case is rare and would happen only with specially designed input. - // For now, just overwrite the last entry. - length = items.length - 1; - } - StateItem item = items[length]; - length++; - return item; - } - - /** @return The index of the last inserted StateItem via a call to {@link #getNext}. */ - public int lastInsertedIndex() { - assert length > 0; - return length - 1; - } - - /** - * Gets a {@link StateItem} from the primary list. Assumes that the item has already been added - * via a call to {@link #getNext}. - * - * @param i The index of the item to get. - * @return The item. - */ - public StateItem getItem(int i) { - assert i >= 0 && i < length; - return items[i]; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(""); - return sb.toString(); - } - } - - /** - * A wrapper for affixes. Affixes can be string-based or pattern-based, and they can come from - * several sources, including the property bag and the locale paterns from CLDR data. - */ - private static class AffixHolder { - final String p; // prefix - final String s; // suffix - final boolean strings; - final boolean negative; - - static final AffixHolder EMPTY_POSITIVE = new AffixHolder("", "", true, false); - static final AffixHolder EMPTY_NEGATIVE = new AffixHolder("", "", true, true); - - static void addToState(ParserState state, DecimalFormatProperties properties) { - AffixHolder pp = fromPropertiesPositivePattern(properties); - AffixHolder np = fromPropertiesNegativePattern(properties); - AffixHolder ps = fromPropertiesPositiveString(properties); - AffixHolder ns = fromPropertiesNegativeString(properties); - if (pp != null) state.affixHolders.add(pp); - if (ps != null) state.affixHolders.add(ps); - if (np != null) state.affixHolders.add(np); - if (ns != null) state.affixHolders.add(ns); - } - - static AffixHolder fromPropertiesPositivePattern(DecimalFormatProperties properties) { - String ppp = properties.getPositivePrefixPattern(); - String psp = properties.getPositiveSuffixPattern(); - if (properties.getSignAlwaysShown()) { - // TODO: This logic is somewhat duplicated from MurkyModifier. - boolean foundSign = false; - String npp = properties.getNegativePrefixPattern(); - String nsp = properties.getNegativeSuffixPattern(); - if (AffixUtils.containsType(npp, AffixUtils.TYPE_MINUS_SIGN)) { - foundSign = true; - ppp = AffixUtils.replaceType(npp, AffixUtils.TYPE_MINUS_SIGN, '+'); - } - if (AffixUtils.containsType(nsp, AffixUtils.TYPE_MINUS_SIGN)) { - foundSign = true; - psp = AffixUtils.replaceType(nsp, AffixUtils.TYPE_MINUS_SIGN, '+'); - } - if (!foundSign) { - ppp = "+" + ppp; - } - } - return getInstance(ppp, psp, false, false); - } - - static AffixHolder fromPropertiesNegativePattern(DecimalFormatProperties properties) { - String npp = properties.getNegativePrefixPattern(); - String nsp = properties.getNegativeSuffixPattern(); - if (npp == null && nsp == null) { - npp = properties.getPositivePrefixPattern(); - nsp = properties.getPositiveSuffixPattern(); - if (npp == null) { - npp = "-"; - } else { - npp = "-" + npp; - } - } - return getInstance(npp, nsp, false, true); - } - - static AffixHolder fromPropertiesPositiveString(DecimalFormatProperties properties) { - String pp = properties.getPositivePrefix(); - String ps = properties.getPositiveSuffix(); - if (pp == null && ps == null) return null; - return getInstance(pp, ps, true, false); - } - - static AffixHolder fromPropertiesNegativeString(DecimalFormatProperties properties) { - String np = properties.getNegativePrefix(); - String ns = properties.getNegativeSuffix(); - if (np == null && ns == null) return null; - return getInstance(np, ns, true, true); - } - - static AffixHolder getInstance(String p, String s, boolean strings, boolean negative) { - if (p == null && s == null) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE; - if (p == null) p = ""; - if (s == null) s = ""; - if (p.length() == 0 && s.length() == 0) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE; - return new AffixHolder(p, s, strings, negative); - } - - AffixHolder(String pp, String sp, boolean strings, boolean negative) { - this.p = pp; - this.s = sp; - this.strings = strings; - this.negative = negative; - } - - @Override - public boolean equals(Object other) { - if (other == null) return false; - if (this == other) return true; - if (!(other instanceof AffixHolder)) return false; - AffixHolder _other = (AffixHolder) other; - if (!p.equals(_other.p)) return false; - if (!s.equals(_other.s)) return false; - if (strings != _other.strings) return false; - if (negative != _other.negative) return false; - return true; - } - - @Override - public int hashCode() { - return p.hashCode() ^ s.hashCode(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("{"); - sb.append(p); - sb.append("|"); - sb.append(s); - sb.append("|"); - sb.append(strings ? 'S' : 'P'); - sb.append("}"); - return sb.toString(); - } - } - - /** - * A class that holds information about all currency affix patterns for the locale. This allows - * the parser to accept currencies in any format that are valid for the locale. - */ - private static class CurrencyAffixPatterns { - private final Set set = new HashSet(); - - private static final ConcurrentHashMap currencyAffixPatterns = - new ConcurrentHashMap(); - - static void addToState(ULocale uloc, ParserState state) { - CurrencyAffixPatterns value = currencyAffixPatterns.get(uloc); - if (value == null) { - // There can be multiple threads computing the same CurrencyAffixPatterns simultaneously, - // but that scenario is harmless. - CurrencyAffixPatterns newValue = new CurrencyAffixPatterns(uloc); - currencyAffixPatterns.putIfAbsent(uloc, newValue); - value = currencyAffixPatterns.get(uloc); - } - state.affixHolders.addAll(value.set); - } - - private CurrencyAffixPatterns(ULocale uloc) { - // Get the basic currency pattern. - String pattern = NumberFormat.getPatternForStyle(uloc, NumberFormat.CURRENCYSTYLE); - addPattern(pattern); - - // Get the currency plural patterns. - // TODO: Update this after CurrencyPluralInfo is replaced. - CurrencyPluralInfo pluralInfo = CurrencyPluralInfo.getInstance(uloc); - for (StandardPlural plural : StandardPlural.VALUES) { - pattern = pluralInfo.getCurrencyPluralPattern(plural.getKeyword()); - addPattern(pattern); - } - } - - private static final ThreadLocal threadLocalProperties = - new ThreadLocal() { - @Override - protected DecimalFormatProperties initialValue() { - return new DecimalFormatProperties(); - } - }; - - private void addPattern(String pattern) { - DecimalFormatProperties properties = threadLocalProperties.get(); - try { - PatternStringParser.parseToExistingProperties(pattern, properties); - } catch (IllegalArgumentException e) { - // This should only happen if there is a bug in CLDR data. Fail silently. - } - set.add(AffixHolder.fromPropertiesPositivePattern(properties)); - set.add(AffixHolder.fromPropertiesNegativePattern(properties)); - } - } - - /** - * Makes a {@link TextTrieMap} for parsing digit strings. A trie is required only if the digit - * strings are longer than one code point. In order for this to be the case, the user would have - * needed to specify custom multi-character digits, like "(0)". - * - * @param digitStrings The list of digit strings from DecimalFormatSymbols. - * @return A trie, or null if a trie is not required. - */ - static TextTrieMap makeDigitTrie(String[] digitStrings) { - boolean requiresTrie = false; - for (int i = 0; i < 10; i++) { - String str = digitStrings[i]; - if (Character.charCount(Character.codePointAt(str, 0)) != str.length()) { - requiresTrie = true; - break; - } - } - if (!requiresTrie) return null; - - // TODO: Consider caching the tries so they don't need to be re-created run to run. - // (Low-priority since multi-character digits are rare in practice) - TextTrieMap trieMap = new TextTrieMap(false); - for (int i = 0; i < 10; i++) { - trieMap.put(digitStrings[i], (byte) i); - } - return trieMap; - } - - protected static final ThreadLocal threadLocalParseState = - new ThreadLocal() { - @Override - protected ParserState initialValue() { - return new ParserState(); - } - }; - - protected static final ThreadLocal threadLocalParsePosition = - new ThreadLocal() { - @Override - protected ParsePosition initialValue() { - return new ParsePosition(0); - } - }; - - /** - * @internal - * @deprecated This API is ICU internal only. TODO: Remove this set from ScientificNumberFormat. - */ - @Deprecated - public static final UnicodeSet UNISET_PLUS = - new UnicodeSet( - 0x002B, 0x002B, 0x207A, 0x207A, 0x208A, 0x208A, 0x2795, 0x2795, 0xFB29, 0xFB29, - 0xFE62, 0xFE62, 0xFF0B, 0xFF0B) - .freeze(); - - /** - * @internal - * @deprecated This API is ICU internal only. TODO: Remove this set from ScientificNumberFormat. - */ - @Deprecated - public static final UnicodeSet UNISET_MINUS = - new UnicodeSet( - 0x002D, 0x002D, 0x207B, 0x207B, 0x208B, 0x208B, 0x2212, 0x2212, 0x2796, 0x2796, - 0xFE63, 0xFE63, 0xFF0D, 0xFF0D) - .freeze(); - - public static Number parse(String input, DecimalFormatProperties properties, DecimalFormatSymbols symbols) { - ParsePosition ppos = threadLocalParsePosition.get(); - ppos.setIndex(0); - return parse(input, ppos, properties, symbols); - } - - // TODO: DELETE ME once debugging is finished - public static volatile boolean DEBUGGING = false; - - /** - * Implements an iterative parser that maintains a lists of possible states at each code point in - * the string. At each code point in the string, the list of possible states is updated based on - * the states coming from the previous code point. The parser stops when it reaches the end of the - * string or when there are no possible parse paths remaining in the string. - * - *

TODO: This API is not fully flushed out. Right now this is internal-only. - * - * @param input The string to parse. - * @param ppos A {@link ParsePosition} to hold the index at which parsing stopped. - * @param properties A property bag, used only for determining the prefix/suffix strings and the - * padding character. - * @param symbols A {@link DecimalFormatSymbols} object, used for determining locale-specific - * symbols for grouping/decimal separators, digit strings, and prefix/suffix substitutions. - * @return A Number matching the parser's best interpretation of the string. - */ - public static Number parse( - CharSequence input, - ParsePosition ppos, - DecimalFormatProperties properties, - DecimalFormatSymbols symbols) { - StateItem best = _parse(input, ppos, false, properties, symbols); - return (best == null) ? null : best.toNumber(properties); - } - - public static CurrencyAmount parseCurrency( - String input, DecimalFormatProperties properties, DecimalFormatSymbols symbols) throws ParseException { - return parseCurrency(input, null, properties, symbols); - } - - public static CurrencyAmount parseCurrency( - CharSequence input, ParsePosition ppos, DecimalFormatProperties properties, DecimalFormatSymbols symbols) - throws ParseException { - if (ppos == null) { - ppos = threadLocalParsePosition.get(); - ppos.setIndex(0); - ppos.setErrorIndex(-1); - } - StateItem best = _parse(input, ppos, true, properties, symbols); - return (best == null) ? null : best.toCurrencyAmount(properties); - } - - private static StateItem _parse( - CharSequence input, - ParsePosition ppos, - boolean parseCurrency, - DecimalFormatProperties properties, - DecimalFormatSymbols symbols) { - - if (input == null || ppos == null || properties == null || symbols == null) { - throw new IllegalArgumentException("All arguments are required for parse."); - } - - ParseMode mode = properties.getParseMode(); - if (mode == null) mode = ParseMode.LENIENT; - boolean integerOnly = properties.getParseIntegerOnly(); - boolean ignoreExponent = properties.getParseNoExponent(); - boolean ignoreGrouping = properties.getGroupingSize() <= 0; - - // Set up the initial state - ParserState state = threadLocalParseState.get().clear(); - state.properties = properties; - state.symbols = symbols; - state.mode = mode; - state.parseCurrency = parseCurrency; - state.groupingMode = properties.getParseGroupingMode(); - if (state.groupingMode == null) state.groupingMode = GroupingMode.DEFAULT; - state.caseSensitive = properties.getParseCaseSensitive(); - state.decimalCp1 = Character.codePointAt(symbols.getDecimalSeparatorString(), 0); - state.decimalCp2 = Character.codePointAt(symbols.getMonetaryDecimalSeparatorString(), 0); - state.groupingCp1 = Character.codePointAt(symbols.getGroupingSeparatorString(), 0); - state.groupingCp2 = Character.codePointAt(symbols.getMonetaryGroupingSeparatorString(), 0); - state.decimalType1 = SeparatorType.fromCp(state.decimalCp1, mode); - state.decimalType2 = SeparatorType.fromCp(state.decimalCp2, mode); - state.groupingType1 = SeparatorType.fromCp(state.groupingCp1, mode); - state.groupingType2 = SeparatorType.fromCp(state.groupingCp2, mode); - StateItem initialStateItem = state.getNext().clear(); - initialStateItem.name = StateName.BEFORE_PREFIX; - - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - state.digitTrie = makeDigitTrie(symbols.getDigitStringsLocal()); - AffixHolder.addToState(state, properties); - if (parseCurrency) { - CurrencyAffixPatterns.addToState(symbols.getULocale(), state); - } - } - - if (DEBUGGING) { - System.out.println("Parsing: " + input); - System.out.println(properties); - System.out.println(state); - } - - // Start walking through the string, one codepoint at a time. Backtracking is not allowed. This - // is to enforce linear runtime and prevent cases that could result in an infinite loop. - int offset = ppos.getIndex(); - for (; offset < input.length(); ) { - int cp = Character.codePointAt(input, offset); - state.swap(); - for (int i = 0; i < state.prevLength; i++) { - StateItem item = state.prevItems[i]; - if (DEBUGGING) { - System.out.println(":" + offset + item.id + " " + item); - } - - // In the switch statement below, if you see a line like: - // if (state.length > 0 && mode == ParseMode.FAST) break; - // it is used for accelerating the fast parse mode. The check is performed only in the - // states BEFORE_PREFIX, AFTER_INTEGER_DIGIT, and AFTER_FRACTION_DIGIT, which are the - // most common states. - - switch (item.name) { - case BEFORE_PREFIX: - // Beginning of string - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptMinusOrPlusSign(cp, StateName.BEFORE_PREFIX, state, item, false); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptBidi(cp, StateName.BEFORE_PREFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptWhitespace(cp, StateName.BEFORE_PREFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptPadding(cp, StateName.BEFORE_PREFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptNan(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptInfinity(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (!integerOnly) { - acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptPrefix(cp, StateName.AFTER_PREFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - if (!ignoreGrouping) { - acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_PREFIX, state, item); - } - } - break; - - case AFTER_PREFIX: - // Prefix is consumed - acceptBidi(cp, StateName.AFTER_PREFIX, state, item); - acceptPadding(cp, StateName.AFTER_PREFIX, state, item); - acceptNan(cp, StateName.BEFORE_SUFFIX, state, item); - acceptInfinity(cp, StateName.BEFORE_SUFFIX, state, item); - acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - if (!integerOnly) { - acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.AFTER_PREFIX, state, item); - if (!ignoreGrouping) { - acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - } - if (parseCurrency) { - acceptCurrency(cp, StateName.AFTER_PREFIX, state, item); - } - } - break; - - case AFTER_INTEGER_DIGIT: - // Previous character was an integer digit (or grouping/whitespace) - acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (!integerOnly) { - acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - if (!ignoreGrouping) { - acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - acceptBidi(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (!ignoreExponent) { - acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, false); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); - } - } - break; - - case AFTER_FRACTION_DIGIT: - // We encountered a decimal point - acceptFractionDigit(cp, StateName.AFTER_FRACTION_DIGIT, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptBidi(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (!ignoreExponent) { - acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - } - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); - if (state.length > 0 && mode == ParseMode.FAST) break; - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, false); - if (state.length > 0 && mode == ParseMode.FAST) break; - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); - } - } - break; - - case AFTER_EXPONENT_SEPARATOR: - acceptBidi(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); - acceptMinusOrPlusSign(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item, true); - acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item); - break; - - case AFTER_EXPONENT_DIGIT: - acceptBidi(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item); - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, false); - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - } - } - break; - - case BEFORE_SUFFIX: - // Accept whitespace, suffixes, and exponent separators - acceptBidi(cp, StateName.BEFORE_SUFFIX, state, item); - acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); - if (!ignoreExponent) { - acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, false); - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); - } - } - break; - - case BEFORE_SUFFIX_SEEN_EXPONENT: - // Accept whitespace and suffixes but not exponent separators - acceptBidi(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { - acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); - } - if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { - acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item, false); - if (parseCurrency) { - acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); - } - } - break; - - case AFTER_SUFFIX: - if ((mode == ParseMode.LENIENT || mode == ParseMode.FAST) && parseCurrency) { - // Continue traversing in case there is a currency symbol to consume - acceptBidi(cp, StateName.AFTER_SUFFIX, state, item); - acceptPadding(cp, StateName.AFTER_SUFFIX, state, item); - acceptWhitespace(cp, StateName.AFTER_SUFFIX, state, item); - // TODO(sffc): acceptMinusOrPlusSign(cp, StateName.AFTER_SUFFIX, state, item, false); - if (parseCurrency) { - acceptCurrency(cp, StateName.AFTER_SUFFIX, state, item); - } - } - // Otherwise, do not accept any more characters. - break; - - case INSIDE_CURRENCY: - acceptCurrencyOffset(cp, state, item); - break; - - case INSIDE_DIGIT: - acceptDigitTrieOffset(cp, state, item); - break; - - case INSIDE_STRING: - acceptStringOffset(cp, state, item); - break; - - case INSIDE_AFFIX_PATTERN: - acceptAffixPatternOffset(cp, state, item); - break; - } - } - - if (state.length == 0) { - // No parse paths continue past this point. We have found the longest parsable string - // from the input. Restore previous state without the offset and break. - state.swapBack(); - break; - } - - offset += Character.charCount(cp); - } - - // Post-processing - if (state.length == 0) { - if (DEBUGGING) { - System.out.println("No matches found"); - System.out.println("- - - - - - - - - -"); - } - return null; - } else { - - // Loop through the candidates. "continue" skips a candidate as invalid. - StateItem best = null; - outer: - for (int i = 0; i < state.length; i++) { - StateItem item = state.items[i]; - - if (DEBUGGING) { - System.out.println(":end " + item); - } - - // Check that at least one digit was read. - if (!item.hasNumber()) { - if (DEBUGGING) System.out.println("-> rejected due to no number value"); - continue; - } - - if (mode == ParseMode.STRICT) { - // Perform extra checks for strict mode. - // We require that the affixes match. - boolean sawPrefix = item.sawPrefix || (item.affix != null && item.affix.p.isEmpty()); - boolean sawSuffix = item.sawSuffix || (item.affix != null && item.affix.s.isEmpty()); - boolean hasEmptyAffix = - state.affixHolders.contains(AffixHolder.EMPTY_POSITIVE) - || state.affixHolders.contains(AffixHolder.EMPTY_NEGATIVE); - if (sawPrefix && sawSuffix) { - // OK - } else if (!sawPrefix && !sawSuffix && hasEmptyAffix) { - // OK - } else { - // Has a prefix or suffix that doesn't match - if (DEBUGGING) System.out.println("-> rejected due to mismatched prefix/suffix"); - continue; - } - - // Check for scientific notation. - if (properties.getMinimumExponentDigits() > 0 && !item.sawExponentDigit) { - if (DEBUGGING) System.out.println("-> reject due to lack of exponent"); - continue; - } - - // Check that grouping sizes are valid. - int grouping1 = properties.getGroupingSize(); - int grouping2 = properties.getSecondaryGroupingSize(); - grouping1 = grouping1 > 0 ? grouping1 : grouping2; - grouping2 = grouping2 > 0 ? grouping2 : grouping1; - long groupingWidths = item.groupingWidths; - int numGroupingRegions = 16 - Long.numberOfLeadingZeros(groupingWidths) / 4; - // If the last grouping is zero, accept strings like "1," but reject string like "1,.23" - // Strip off multiple last-groupings to handle cases like "123,," or "123 " - while (numGroupingRegions > 1 && (groupingWidths & 0xf) == 0) { - if (item.sawDecimalPoint) { - if (DEBUGGING) System.out.println("-> rejected due to decimal point after grouping"); - continue outer; - } else { - groupingWidths >>>= 4; - numGroupingRegions--; - } - } - if (grouping1 <= 0) { - // OK (no grouping data available) - } else if (numGroupingRegions <= 1) { - // OK (no grouping digits) - } else if ((groupingWidths & 0xf) != grouping1) { - // First grouping size is invalid - if (DEBUGGING) System.out.println("-> rejected due to first grouping violation"); - continue; - } else if (((groupingWidths >>> ((numGroupingRegions - 1) * 4)) & 0xf) > grouping2) { - // String like "1234,567" where the highest grouping is too large - if (DEBUGGING) System.out.println("-> rejected due to final grouping violation"); - continue; - } else { - for (int j = 1; j < numGroupingRegions - 1; j++) { - if (((groupingWidths >>> (j * 4)) & 0xf) != grouping2) { - // A grouping size somewhere in the middle is invalid - if (DEBUGGING) System.out.println("-> rejected due to inner grouping violation"); - continue outer; - } - } - } - } - - // Optionally require that the presence of a decimal point matches the pattern. - if (properties.getDecimalPatternMatchRequired() - && item.sawDecimalPoint - != (properties.getDecimalSeparatorAlwaysShown() - || properties.getMaximumFractionDigits() != 0)) { - if (DEBUGGING) System.out.println("-> rejected due to decimal point violation"); - continue; - } - - // When parsing currencies, require that a currency symbol was found. - if (parseCurrency && !item.sawCurrency) { - if (DEBUGGING) System.out.println("-> rejected due to lack of currency"); - continue; - } - - // If we get here, then this candidate is acceptable. - // Use the earliest candidate in the list, or the one with the highest score, or the - // one with the fewest trailing digits. - if (best == null) { - best = item; - } else if (item.score > best.score) { - best = item; - } else if (item.trailingCount < best.trailingCount) { - best = item; - } - } - - if (DEBUGGING) { - System.out.println("- - - - - - - - - -"); - } - - if (best != null) { - ppos.setIndex(offset - best.trailingCount); - return best; - } else { - ppos.setErrorIndex(offset); - return null; - } - } - } - - /** - * If cp is whitespace (as determined by the unicode set {@link #UNISET_WHITESPACE}), - * copies item to the new list in state and sets its state name to - * nextName. - * - * @param cp The code point to check. - * @param nextName The new state name if the check passes. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptWhitespace( - int cp, StateName nextName, ParserState state, StateItem item) { - if (UNISET_WHITESPACE.contains(cp)) { - state.getNext().copyFrom(item, nextName, cp); - } - } - - /** - * If cp is a bidi control character (as determined by the unicode set {@link - * #UNISET_BIDI}), copies item to the new list in state and sets its - * state name to nextName. - * - * @param cp The code point to check. - * @param nextName The new state name if the check passes. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptBidi(int cp, StateName nextName, ParserState state, StateItem item) { - if (UNISET_BIDI.contains(cp)) { - state.getNext().copyFrom(item, nextName, cp); - } - } - - /** - * If cp is a padding character (as determined by {@link ParserState#paddingCp}), - * copies item to the new list in state and sets its state name to - * nextName. - * - * @param cp The code point to check. - * @param nextName The new state name if the check passes. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptPadding(int cp, StateName nextName, ParserState state, StateItem item) { - CharSequence padding = state.properties.getPadString(); - if (padding == null || padding.length() == 0) return; - int referenceCp = Character.codePointAt(padding, 0); - if (cp == referenceCp) { - state.getNext().copyFrom(item, nextName, cp); - } - } - - private static void acceptIntegerDigit( - int cp, StateName nextName, ParserState state, StateItem item) { - acceptDigitHelper(cp, nextName, state, item, DigitType.INTEGER); - } - - private static void acceptFractionDigit( - int cp, StateName nextName, ParserState state, StateItem item) { - acceptDigitHelper(cp, nextName, state, item, DigitType.FRACTION); - } - - private static void acceptExponentDigit( - int cp, StateName nextName, ParserState state, StateItem item) { - acceptDigitHelper(cp, nextName, state, item, DigitType.EXPONENT); - } - - /** - * If cp is a digit character (as determined by either {@link UCharacter#digit} or - * {@link ParserState#digitCps}), copies item to the new list in state - * and sets its state name to one determined by type. Also copies the digit into a - * field in the new item determined by type. - * - * @param cp The code point to check. - * @param nextName The state to set if a digit is accepted. - * @param state The state object to update. - * @param item The old state leading into the code point. - * @param type The digit type, which determines the next state and the field into which to insert - * the digit. - */ - private static void acceptDigitHelper( - int cp, StateName nextName, ParserState state, StateItem item, DigitType type) { - // Check the Unicode digit character property - byte digit = (byte) UCharacter.digit(cp, 10); - StateItem next = null; - - // Look for the digit: - if (digit >= 0) { - // Code point is a number - next = state.getNext().copyFrom(item, nextName, -1); - } - - // Do not perform the expensive string manipulations in fast mode. - if (digit < 0 && (state.mode == ParseMode.LENIENT || state.mode == ParseMode.STRICT)) { - if (state.digitTrie == null) { - // Check custom digits, all of which are at most one code point - for (byte d = 0; d < 10; d++) { - int referenceCp = Character.codePointAt(state.symbols.getDigitStringsLocal()[d], 0); - if (cp == referenceCp) { - digit = d; - next = state.getNext().copyFrom(item, nextName, -1); - } - } - } else { - // Custom digits have more than one code point - acceptDigitTrie(cp, nextName, state, item, type); - } - } - - // Save state - recordDigit(next, digit, type); - } - - /** - * Helper function for {@link acceptDigit} and {@link acceptDigitTrie} to save a complete digit in - * a state item and update grouping widths. - * - * @param next The new StateItem - * @param digit The digit to record - * @param type The type of the digit to record (INTEGER, FRACTION, or EXPONENT) - */ - private static void recordDigit(StateItem next, byte digit, DigitType type) { - if (next == null) return; - next.appendDigit(digit, type); - if (type == DigitType.INTEGER && (next.groupingWidths & 0xf) < 15) { - next.groupingWidths++; - } - } - - /** - * If cp is a sign (as determined by the unicode sets {@link #UNISET_PLUS} and {@link - * #UNISET_MINUS}), copies item to the new list in state. Loops back to - * the same state name. - * - * @param cp The code point to check. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptMinusOrPlusSign( - int cp, StateName nextName, ParserState state, StateItem item, boolean exponent) { - acceptMinusSign(cp, nextName, null, state, item, exponent); - acceptPlusSign(cp, nextName, null, state, item, exponent); - } - - private static long acceptMinusSign( - int cp, - StateName returnTo1, - StateName returnTo2, - ParserState state, - StateItem item, - boolean exponent) { - if (UNISET_MINUS.contains(cp)) { - StateItem next = state.getNext().copyFrom(item, returnTo1, -1); - next.returnTo1 = returnTo2; - if (exponent) { - next.sawNegativeExponent = true; - } else { - next.sawNegative = true; - } - return 1L << state.lastInsertedIndex(); - } else { - return 0L; - } - } - - private static long acceptPlusSign( - int cp, - StateName returnTo1, - StateName returnTo2, - ParserState state, - StateItem item, - boolean exponent) { - if (UNISET_PLUS.contains(cp)) { - StateItem next = state.getNext().copyFrom(item, returnTo1, -1); - next.returnTo1 = returnTo2; - return 1L << state.lastInsertedIndex(); - } else { - return 0L; - } - } - - /** - * If cp is a grouping separator (as determined by the unicode set {@link - * #UNISET_GROUPING}), copies item to the new list in state and loops - * back to the same state. Also accepts if cp is the locale-specific grouping - * separator in {@link ParserState#groupingCp}, in which case the {@link - * StateItem#usesLocaleSymbols} flag is also set. - * - * @param cp The code point to check. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptGrouping( - int cp, StateName nextName, ParserState state, StateItem item) { - // Do not accept mixed grouping separators in the same string. - if (item.groupingCp == -1) { - // First time seeing a grouping separator. - SeparatorType cpType = SeparatorType.fromCp(cp, state.mode); - - // Always accept if exactly the same as the locale grouping separator. - if (cp != state.groupingCp1 && cp != state.groupingCp2) { - // Reject if not in one of the three primary equivalence classes. - if (cpType == SeparatorType.UNKNOWN) { - return; - } - if (state.groupingMode == GroupingMode.RESTRICTED) { - // Reject if not in the same class as the locale grouping separator. - if (cpType != state.groupingType1 || cpType != state.groupingType2) { - return; - } - } else { - // Reject if in the same class as the decimal separator. - if (cpType == SeparatorType.COMMA_LIKE - && (state.decimalType1 == SeparatorType.COMMA_LIKE - || state.decimalType2 == SeparatorType.COMMA_LIKE)) { - return; - } - if (cpType == SeparatorType.PERIOD_LIKE - && (state.decimalType1 == SeparatorType.PERIOD_LIKE - || state.decimalType2 == SeparatorType.PERIOD_LIKE)) { - return; - } - } - } - - // A match was found. - StateItem next = state.getNext().copyFrom(item, nextName, cp); - next.groupingCp = cp; - next.groupingWidths <<= 4; - } else { - // Have already seen a grouping separator. - if (cp == item.groupingCp) { - StateItem next = state.getNext().copyFrom(item, nextName, cp); - next.groupingWidths <<= 4; - } - } - } - - /** - * If cp is a decimal (as determined by the unicode set {@link #UNISET_DECIMAL}), - * copies item to the new list in state and goes to {@link - * StateName#AFTER_FRACTION_DIGIT}. Also accepts if cp is the locale-specific decimal - * point in {@link ParserState#decimalCp}, in which case the {@link StateItem#usesLocaleSymbols} - * flag is also set. - * - * @param cp The code point to check. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptDecimalPoint( - int cp, StateName nextName, ParserState state, StateItem item) { - if (cp == item.groupingCp) { - // Don't accept a decimal point that is the same as the grouping separator - return; - } - - SeparatorType cpType = SeparatorType.fromCp(cp, state.mode); - - // We require that the decimal separator be in the same class as the locale. - if (cpType != state.decimalType1 && cpType != state.decimalType2) { - return; - } - - // If in UNKNOWN or OTHER, require an exact match. - if (cpType == SeparatorType.OTHER_GROUPING || cpType == SeparatorType.UNKNOWN) { - if (cp != state.decimalCp1 && cp != state.decimalCp2) { - return; - } - } - - // A match was found. - StateItem next = state.getNext().copyFrom(item, nextName, -1); - next.sawDecimalPoint = true; - } - - private static void acceptNan(int cp, StateName nextName, ParserState state, StateItem item) { - CharSequence nan = state.symbols.getNaN(); - long added = acceptString(cp, nextName, null, state, item, nan, 0, false); - - // Set state in the items that were added by the function call - for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { - if (((1L << i) & added) != 0) { - state.getItem(i).sawNaN = true; - } - } - } - - private static void acceptInfinity( - int cp, StateName nextName, ParserState state, StateItem item) { - CharSequence inf = state.symbols.getInfinity(); - long added = acceptString(cp, nextName, null, state, item, inf, 0, false); - - // Set state in the items that were added by the function call - for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { - if (((1L << i) & added) != 0) { - state.getItem(i).sawInfinity = true; - } - } - } - - private static void acceptExponentSeparator( - int cp, StateName nextName, ParserState state, StateItem item) { - CharSequence exp = state.symbols.getExponentSeparator(); - acceptString(cp, nextName, null, state, item, exp, 0, true); - } - - private static void acceptPrefix(int cp, StateName nextName, ParserState state, StateItem item) { - for (AffixHolder holder : state.affixHolders) { - acceptAffixHolder(cp, nextName, state, item, holder, true); - } - } - - private static void acceptSuffix(int cp, StateName nextName, ParserState state, StateItem item) { - if (item.affix != null) { - acceptAffixHolder(cp, nextName, state, item, item.affix, false); - } else { - for (AffixHolder holder : state.affixHolders) { - acceptAffixHolder(cp, nextName, state, item, holder, false); - } - } - } - - private static void acceptAffixHolder( - int cp, - StateName nextName, - ParserState state, - StateItem item, - AffixHolder holder, - boolean prefix) { - if (holder == null) return; - String str = prefix ? holder.p : holder.s; - long added; - if (holder.strings) { - added = acceptString(cp, nextName, null, state, item, str, 0, false); - } else { - added = - acceptAffixPattern(cp, nextName, state, item, str, AffixUtils.nextToken(0, str)); - } - // Record state in the added entries - for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { - if (((1L << i) & added) != 0) { - StateItem next = state.getItem(i); - next.affix = holder; - if (prefix) next.sawPrefix = true; - if (!prefix) next.sawSuffix = true; - if (holder.negative) next.sawNegative = true; - // 10 point reward for consuming a prefix/suffix: - next.score += 10; - // 1 point reward for positive holders (if there is ambiguity, we want to favor positive): - if (!holder.negative) next.score += 1; - // 5 point reward for affix holders that have an empty prefix or suffix (we won't see them again): - if (!next.sawPrefix && holder.p.isEmpty()) next.score += 5; - if (!next.sawSuffix && holder.s.isEmpty()) next.score += 5; - } - } - } - - private static long acceptStringOffset(int cp, ParserState state, StateItem item) { - return acceptString( - cp, - item.returnTo1, - item.returnTo2, - state, - item, - item.currentString, - item.currentOffset, - item.currentTrailing); - } - - /** - * Accepts a code point if the code point is compatible with the string at the given offset. - * Handles runs of ignorable characters. - * - *

This method will add either one or two {@link StateItem} to the {@link ParserState}. - * - * @param cp The current code point, which will be checked for a match to the string. - * @param ret1 The state to return to after reaching the end of the string. - * @param ret2 The state to save in returnTo1 after reaching the end of the string. - * Set to null if returning to the main state loop. - * @param trailing true if this string should be ignored for the purposes of recording trailing - * code points; false if it trailing count should be reset after reading the string. - * @param state The current {@link ParserState} - * @param item The current {@link StateItem} - * @param str The string against which to check for a match. - * @param offset The number of chars into the string. Initial value should be 0. - * @param trailing false if this string is strong and should reset trailing count to zero when it - * is fully consumed. - * @return A bitmask where the bits correspond to the items that were added. Set to 0L if no items - * were added. - */ - private static long acceptString( - int cp, - StateName ret1, - StateName ret2, - ParserState state, - StateItem item, - CharSequence str, - int offset, - boolean trailing) { - if (str == null || str.length() == 0) return 0L; - return acceptStringOrAffixPatternWithIgnorables( - cp, ret1, ret2, state, item, str, offset, trailing, true); - } - - private static long acceptStringNonIgnorable( - int cp, - StateName ret1, - StateName ret2, - ParserState state, - StateItem item, - CharSequence str, - boolean trailing, - int referenceCp, - long firstOffsetOrTag, - long nextOffsetOrTag) { - long added = 0L; - int firstOffset = (int) firstOffsetOrTag; - int nextOffset = (int) nextOffsetOrTag; - if (codePointEquals(referenceCp, cp, state)) { - if (firstOffset < str.length()) { - added |= acceptStringHelper(cp, ret1, ret2, state, item, str, firstOffset, trailing); - } - if (nextOffset >= str.length()) { - added |= acceptStringHelper(cp, ret1, ret2, state, item, str, nextOffset, trailing); - } - return added; - } else { - return 0L; - } - } - - /** - * Internal method that is used to step to the next code point of a string or exit the string if - * at the end. - * - * @param cp See {@link #acceptString} - * @param returnTo1 See {@link #acceptString} - * @param returnTo2 See {@link #acceptString} - * @param state See {@link #acceptString} - * @param item See {@link #acceptString} - * @param str See {@link #acceptString} - * @param newOffset The offset at which the next step should start. If past the end of the string, - * exit the string and return to the outer loop. - * @param trailing See {@link #acceptString} - * @return Bitmask containing one entry, the one that was added. - */ - private static long acceptStringHelper( - int cp, - StateName returnTo1, - StateName returnTo2, - ParserState state, - StateItem item, - CharSequence str, - int newOffset, - boolean trailing) { - StateItem next = state.getNext().copyFrom(item, null, cp); - next.score += 1; // reward for consuming a cp from string - if (newOffset < str.length()) { - // String has more code points. - next.name = StateName.INSIDE_STRING; - next.returnTo1 = returnTo1; - next.returnTo2 = returnTo2; - next.currentString = str; - next.currentOffset = newOffset; - next.currentTrailing = trailing; - } else { - // We've reached the end of the string. - next.name = returnTo1; - if (!trailing) next.trailingCount = 0; - next.returnTo1 = returnTo2; - next.returnTo2 = null; - } - return 1L << state.lastInsertedIndex(); - } - - private static long acceptAffixPatternOffset(int cp, ParserState state, StateItem item) { - return acceptAffixPattern( - cp, item.returnTo1, state, item, item.currentAffixPattern, item.currentStepwiseParserTag); - } - - /** - * Accepts a code point if the code point is compatible with the affix pattern at the offset - * encoded in the tag argument. - * - * @param cp The current code point, which will be checked for a match to the string. - * @param returnTo The state to return to after reaching the end of the string. - * @param state The current {@link ParserState} - * @param item The current {@link StateItem} - * @param str The string containing the affix pattern. - * @param tag The current state of the stepwise parser. Initial value should be 0L. - * @return A bitmask where the bits correspond to the items that were added. Set to 0L if no items - * were added. - */ - private static long acceptAffixPattern( - int cp, StateName ret1, ParserState state, StateItem item, CharSequence str, long tag) { - if (str == null || str.length() == 0) return 0L; - return acceptStringOrAffixPatternWithIgnorables( - cp, ret1, null, state, item, str, tag, false, false); - } - - private static long acceptAffixPatternNonIgnorable( - int cp, - StateName returnTo, - ParserState state, - StateItem item, - CharSequence str, - int typeOrCp, - long firstTag, - long nextTag) { - - // Convert from the returned tag to a code point, string, or currency to check - int resolvedCp = -1; - CharSequence resolvedStr = null; - boolean resolvedMinusSign = false; - boolean resolvedPlusSign = false; - boolean resolvedCurrency = false; - if (typeOrCp < 0) { - // Symbol - switch (typeOrCp) { - case AffixUtils.TYPE_MINUS_SIGN: - resolvedMinusSign = true; - break; - case AffixUtils.TYPE_PLUS_SIGN: - resolvedPlusSign = true; - break; - case AffixUtils.TYPE_PERCENT: - resolvedStr = state.symbols.getPercentString(); - if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '%') { - resolvedCp = '%'; // accept ASCII percent as well as locale percent - } - break; - case AffixUtils.TYPE_PERMILLE: - resolvedStr = state.symbols.getPerMillString(); - if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '‰') { - resolvedCp = '‰'; // accept ASCII permille as well as locale permille - } - break; - case AffixUtils.TYPE_CURRENCY_SINGLE: - case AffixUtils.TYPE_CURRENCY_DOUBLE: - case AffixUtils.TYPE_CURRENCY_TRIPLE: - case AffixUtils.TYPE_CURRENCY_QUAD: - case AffixUtils.TYPE_CURRENCY_QUINT: - case AffixUtils.TYPE_CURRENCY_OVERFLOW: - resolvedCurrency = true; - break; - default: - throw new AssertionError(); - } - } else { - resolvedCp = typeOrCp; - } - - long added = 0L; - if (resolvedCp >= 0 && codePointEquals(cp, resolvedCp, state)) { - if (firstTag >= 0) { - added |= acceptAffixPatternHelper(cp, returnTo, state, item, str, firstTag); - } - if (nextTag < 0) { - added |= acceptAffixPatternHelper(cp, returnTo, state, item, str, nextTag); - } - } - if (resolvedMinusSign) { - if (firstTag >= 0) { - added |= acceptMinusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false); - } - if (nextTag < 0) { - added |= acceptMinusSign(cp, returnTo, null, state, item, false); - } - if (added == 0L) { - // Also attempt to accept custom minus sign string - String mss = state.symbols.getMinusSignString(); - int mssCp = Character.codePointAt(mss, 0); - if (mss.length() != Character.charCount(mssCp) || !UNISET_MINUS.contains(mssCp)) { - resolvedStr = mss; - } - } - } - if (resolvedPlusSign) { - if (firstTag >= 0) { - added |= acceptPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false); - } - if (nextTag < 0) { - added |= acceptPlusSign(cp, returnTo, null, state, item, false); - } - if (added == 0L) { - // Also attempt to accept custom plus sign string - String pss = state.symbols.getPlusSignString(); - int pssCp = Character.codePointAt(pss, 0); - if (pss.length() != Character.charCount(pssCp) || !UNISET_MINUS.contains(pssCp)) { - resolvedStr = pss; - } - } - } - if (resolvedStr != null) { - if (firstTag >= 0) { - added |= - acceptString( - cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, resolvedStr, 0, false); - } - if (nextTag < 0) { - added |= acceptString(cp, returnTo, null, state, item, resolvedStr, 0, false); - } - } - if (resolvedCurrency) { - if (firstTag >= 0) { - added |= acceptCurrency(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item); - } - if (nextTag < 0) { - added |= acceptCurrency(cp, returnTo, null, state, item); - } - } - - // Set state in the items that were added by the function calls - for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { - if (((1L << i) & added) != 0) { - state.getItem(i).currentAffixPattern = str; - state.getItem(i).currentStepwiseParserTag = firstTag; - } - } - return added; - } - - /** - * Internal method that is used to step to the next token of a affix pattern or exit the affix - * pattern if at the end. - * - * @param cp See {@link #acceptAffixPattern} - * @param returnTo1 See {@link #acceptAffixPattern} - * @param state See {@link #acceptAffixPattern} - * @param item See {@link #acceptAffixPattern} - * @param str See {@link #acceptAffixPattern} - * @param newOffset The tag corresponding to the next token in the affix pattern that should be - * recorded and consumed in a future call to {@link #acceptAffixPatternOffset}. - * @return Bitmask containing one entry, the one that was added. - */ - private static long acceptAffixPatternHelper( - int cp, - StateName returnTo, - ParserState state, - StateItem item, - CharSequence str, - long newTag) { - StateItem next = state.getNext().copyFrom(item, null, cp); - next.score += 1; // reward for consuming a cp from pattern - if (newTag >= 0) { - // Additional tokens in affix string. - next.name = StateName.INSIDE_AFFIX_PATTERN; - next.returnTo1 = returnTo; - next.currentAffixPattern = str; - next.currentStepwiseParserTag = newTag; - } else { - // Reached last token in affix string. - next.name = returnTo; - next.trailingCount = 0; - next.returnTo1 = null; - } - return 1L << state.lastInsertedIndex(); - } - - /** - * Consumes tokens from a string or affix pattern following ICU's rules for handling of whitespace - * and bidi control characters (collectively called "ignorables"). The methods {@link - * #acceptStringHelper}, {@link #acceptAffixPatternHelper}, {@link #acceptStringNonIgnorable}, and - * {@link #acceptAffixPatternNonIgnorable} will be called by this method to actually add parse - * paths. - * - *

In the "NonIgnorable" functions, two arguments are passed: firstOffsetOrTag and - * nextOffsetOrTag. These two arguments should add parse paths according to the following rules: - * - *

-   * if (firstOffsetOrTag is valid or inside string boundary) {
-   *   // Add parse path going to firstOffsetOrTag
-   * }
-   * if (nextOffsetOrTag is invalid or beyond string boundary) {
-   *   // Add parse path leaving the string
-   * }
-   * 
- * - *

Note that there may be multiple parse paths added by these lines. This is important in order - * to properly handle runs of ignorables. - * - * @param cp See {@link #acceptString} and {@link #acceptAffixPattern} - * @param ret1 See {@link #acceptString} and {@link #acceptAffixPattern} - * @param ret2 See {@link #acceptString} (affix pattern can pass null) - * @param state See {@link #acceptString} and {@link #acceptAffixPattern} - * @param item See {@link #acceptString} and {@link #acceptAffixPattern} - * @param str See {@link #acceptString} and {@link #acceptAffixPattern} - * @param offsetOrTag The current int offset for strings, or the current tag for affix patterns. - * @param trailing See {@link #acceptString} (affix patterns can pass false) - * @param isString true if the parameters correspond to a string; false if they correspond to an - * affix pattern. - * @return A bitmask containing the entries that were added. - */ - private static long acceptStringOrAffixPatternWithIgnorables( - int cp, - StateName ret1, - StateName ret2 /* String only */, - ParserState state, - StateItem item, - CharSequence str, - long offsetOrTag /* offset for string; tag for affix pattern */, - boolean trailing /* String only */, - boolean isString) { - - // Runs of ignorables (whitespace and bidi control marks) can occur at the beginning, middle, - // or end of the reference string, or a run across the entire string. - // - // - A run at the beginning or in the middle corresponds to a run of length *zero or more* - // in the input. - // - A run at the end need to be matched exactly. - // - A string that contains only ignorable characters also needs to be matched exactly. - // - // Because the behavior differs, we need logic here to determine which case we have. - - int typeOrCp = - isString - ? Character.codePointAt(str, (int) offsetOrTag) - : AffixUtils.getTypeOrCp(offsetOrTag); - - if (isIgnorable(typeOrCp, state)) { - // Look for the next nonignorable code point - int nextTypeOrCp = typeOrCp; - long prevOffsetOrTag; - long nextOffsetOrTag = offsetOrTag; - long firstOffsetOrTag = 0L; - while (true) { - prevOffsetOrTag = nextOffsetOrTag; - nextOffsetOrTag = - isString - ? nextOffsetOrTag + Character.charCount(nextTypeOrCp) - : AffixUtils.nextToken(nextOffsetOrTag, str); - if (firstOffsetOrTag == 0L) firstOffsetOrTag = nextOffsetOrTag; - if (isString ? nextOffsetOrTag >= str.length() : nextOffsetOrTag < 0) { - // Integer.MIN_VALUE is an invalid value for either a type or a cp; - // use it to indicate the end of the string. - nextTypeOrCp = Integer.MIN_VALUE; - break; - } - nextTypeOrCp = - isString - ? Character.codePointAt(str, (int) nextOffsetOrTag) - : AffixUtils.getTypeOrCp(nextOffsetOrTag); - if (!isIgnorable(nextTypeOrCp, state)) break; - } - - if (nextTypeOrCp == Integer.MIN_VALUE) { - // Run at end or string that contains only ignorable characters. - if (codePointEquals(cp, typeOrCp, state)) { - // Step forward and also exit the string if not at very end. - // RETURN - long added = 0L; - added |= - isString - ? acceptStringHelper( - cp, ret1, ret2, state, item, str, (int) firstOffsetOrTag, trailing) - : acceptAffixPatternHelper(cp, ret1, state, item, str, firstOffsetOrTag); - if (firstOffsetOrTag != nextOffsetOrTag) { - added |= - isString - ? acceptStringHelper( - cp, ret1, ret2, state, item, str, (int) nextOffsetOrTag, trailing) - : acceptAffixPatternHelper(cp, ret1, state, item, str, nextOffsetOrTag); - } - return added; - } else { - // Code point does not exactly match the run at end. - // RETURN - return 0L; - } - } else { - // Run at beginning or in middle. - if (isIgnorable(cp, state)) { - // Consume the ignorable. - // RETURN - return isString - ? acceptStringHelper( - cp, ret1, ret2, state, item, str, (int) prevOffsetOrTag, trailing) - : acceptAffixPatternHelper(cp, ret1, state, item, str, prevOffsetOrTag); - } else { - // Go to nonignorable cp. - // FALL THROUGH - } - } - - // Fall through to the nonignorable code point found above. - assert nextTypeOrCp != Integer.MIN_VALUE; - typeOrCp = nextTypeOrCp; - offsetOrTag = nextOffsetOrTag; - } - assert !isIgnorable(typeOrCp, state); - - // Look for the next nonignorable code point after this nonignorable code point - // to determine if we are at the end of the string. - int nextTypeOrCp = typeOrCp; - long nextOffsetOrTag = offsetOrTag; - long firstOffsetOrTag = 0L; - while (true) { - nextOffsetOrTag = - isString - ? nextOffsetOrTag + Character.charCount(nextTypeOrCp) - : AffixUtils.nextToken(nextOffsetOrTag, str); - if (firstOffsetOrTag == 0L) firstOffsetOrTag = nextOffsetOrTag; - if (isString ? nextOffsetOrTag >= str.length() : nextOffsetOrTag < 0) { - nextTypeOrCp = -1; - break; - } - nextTypeOrCp = - isString - ? Character.codePointAt(str, (int) nextOffsetOrTag) - : AffixUtils.getTypeOrCp(nextOffsetOrTag); - if (!isIgnorable(nextTypeOrCp, state)) break; - } - - // Nonignorable logic. - return isString - ? acceptStringNonIgnorable( - cp, ret1, ret2, state, item, str, trailing, typeOrCp, firstOffsetOrTag, nextOffsetOrTag) - : acceptAffixPatternNonIgnorable( - cp, ret1, state, item, str, typeOrCp, firstOffsetOrTag, nextOffsetOrTag); - } - - /** - * This method can add up to four items to the new list in state. - * - *

If cp is equal to any known ISO code or long name, copies item to - * the new list in state and sets its ISO code to the corresponding currency. - * - *

If cp is the first code point of any ISO code or long name having more them one - * code point in length, copies item to the new list in state along with - * an instance of {@link TextTrieMap.ParseState} for tracking the following code points. - * - * @param cp The code point to check. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptCurrency( - int cp, StateName nextName, ParserState state, StateItem item) { - acceptCurrency(cp, nextName, null, state, item); - } - - private static long acceptCurrency( - int cp, StateName returnTo1, StateName returnTo2, ParserState state, StateItem item) { - if (item.sawCurrency) return 0L; - long added = 0L; - - // Accept from local currency information - String str1, str2; - Currency currency = state.properties.getCurrency(); - if (currency != null) { - str1 = currency.getName(state.symbols.getULocale(), Currency.SYMBOL_NAME, null); - str2 = currency.getCurrencyCode(); - // TODO: Should we also accept long names? In currency mode, they are in the CLDR data. - } else { - currency = state.symbols.getCurrency(); - str1 = state.symbols.getCurrencySymbol(); - str2 = state.symbols.getInternationalCurrencySymbol(); - } - added |= acceptString(cp, returnTo1, returnTo2, state, item, str1, 0, false); - added |= acceptString(cp, returnTo1, returnTo2, state, item, str2, 0, false); - for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { - if (((1L << i) & added) != 0) { - state.getItem(i).sawCurrency = true; - state.getItem(i).isoCode = str2; - } - } - - // Accept from CLDR data - if (state.parseCurrency) { - ULocale uloc = state.symbols.getULocale(); - TextTrieMap.ParseState trie1 = - Currency.openParseState(uloc, cp, Currency.LONG_NAME); - TextTrieMap.ParseState trie2 = - Currency.openParseState(uloc, cp, Currency.SYMBOL_NAME); - added |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie1); - added |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie2); - } - - return added; - } - - /** - * If cp is the next code point of any currency, copies item to the new - * list in state along with an instance of {@link TextTrieMap.ParseState} for - * tracking the following code points. - * - *

This method should only be called in a state following {@link #acceptCurrency}. - * - * @param cp The code point to check. - * @param state The state object to update. - * @param item The old state leading into the code point. - */ - private static void acceptCurrencyOffset(int cp, ParserState state, StateItem item) { - acceptCurrencyHelper( - cp, item.returnTo1, item.returnTo2, state, item, item.currentCurrencyTrieState); - } - - private static long acceptCurrencyHelper( - int cp, - StateName returnTo1, - StateName returnTo2, - ParserState state, - StateItem item, - TextTrieMap.ParseState trieState) { - if (trieState == null) return 0L; - trieState.accept(cp); - long added = 0L; - Iterator currentMatches = trieState.getCurrentMatches(); - if (currentMatches != null) { - // Match on current code point - // TODO: What should happen with multiple currency matches? - StateItem next = state.getNext().copyFrom(item, returnTo1, -1); - next.returnTo1 = returnTo2; - next.returnTo2 = null; - next.sawCurrency = true; - next.isoCode = currentMatches.next().getISOCode(); - added |= 1L << state.lastInsertedIndex(); - } - if (!trieState.atEnd()) { - // Prepare for matches on future code points - StateItem next = state.getNext().copyFrom(item, StateName.INSIDE_CURRENCY, -1); - next.returnTo1 = returnTo1; - next.returnTo2 = returnTo2; - next.currentCurrencyTrieState = trieState; - added |= 1L << state.lastInsertedIndex(); - } - return added; - } - - private static long acceptDigitTrie( - int cp, StateName nextName, ParserState state, StateItem item, DigitType type) { - assert state.digitTrie != null; - TextTrieMap.ParseState trieState = state.digitTrie.openParseState(cp); - if (trieState == null) return 0L; - return acceptDigitTrieHelper(cp, nextName, state, item, type, trieState); - } - - private static void acceptDigitTrieOffset(int cp, ParserState state, StateItem item) { - acceptDigitTrieHelper( - cp, item.returnTo1, state, item, item.currentDigitType, item.currentDigitTrieState); - } - - private static long acceptDigitTrieHelper( - int cp, - StateName returnTo1, - ParserState state, - StateItem item, - DigitType type, - TextTrieMap.ParseState trieState) { - if (trieState == null) return 0L; - trieState.accept(cp); - long added = 0L; - Iterator currentMatches = trieState.getCurrentMatches(); - if (currentMatches != null) { - // Match on current code point - byte digit = currentMatches.next(); - StateItem next = state.getNext().copyFrom(item, returnTo1, -1); - next.returnTo1 = null; - recordDigit(next, digit, type); - added |= 1L << state.lastInsertedIndex(); - } - if (!trieState.atEnd()) { - // Prepare for matches on future code points - StateItem next = state.getNext().copyFrom(item, StateName.INSIDE_DIGIT, -1); - next.returnTo1 = returnTo1; - next.currentDigitTrieState = trieState; - next.currentDigitType = type; - added |= 1L << state.lastInsertedIndex(); - } - return added; - } - - /** - * Checks whether the two given code points are equal after applying case mapping as requested in - * the ParserState. - * - * @see #acceptString - * @see #acceptAffixPattern - */ - private static boolean codePointEquals(int cp1, int cp2, ParserState state) { - if (!state.caseSensitive) { - cp1 = UCharacter.foldCase(cp1, true); - cp2 = UCharacter.foldCase(cp2, true); - } - return cp1 == cp2; - } - - /** - * Checks whether the given code point is "ignorable" and should be skipped. BiDi control marks - * are always ignorable, and whitespace is ignorable in lenient mode. - * - *

Returns false if cp is negative. - * - * @param cp The code point to test. - * @param state The current {@link ParserState}, used for determining strict mode. - * @return true if cp is ignorable; false otherwise. - */ - private static boolean isIgnorable(int cp, ParserState state) { - if (cp < 0) return false; - if (UNISET_BIDI.contains(cp)) return true; - return state.mode == ParseMode.LENIENT && UNISET_WHITESPACE.contains(cp); - } -} diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index b808c6ec8ef..005ba8e8ea5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -12,7 +12,6 @@ import com.ibm.icu.impl.number.AffixPatternProvider; import com.ibm.icu.impl.number.AffixUtils; import com.ibm.icu.impl.number.CustomSymbolCurrency; import com.ibm.icu.impl.number.DecimalFormatProperties; -import com.ibm.icu.impl.number.Parse.ParseMode; import com.ibm.icu.impl.number.PatternStringParser; import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo; import com.ibm.icu.impl.number.PropertiesAffixPatternProvider; @@ -31,6 +30,43 @@ import com.ibm.icu.util.ULocale; * */ public class NumberParserImpl { + + // TODO: Find a better place for this enum. + /** Controls the set of rules for parsing a string. */ + public static enum ParseMode { + /** + * Lenient mode should be used if you want to accept malformed user input. It will use + * heuristics to attempt to parse through typographical errors in the string. + */ + LENIENT, + + /** + * Strict mode should be used if you want to require that the input is well-formed. More + * specifically, it differs from lenient mode in the following ways: + * + *

    + *
  • Grouping widths must match the grouping settings. For example, "12,3,45" will fail if + * the grouping width is 3, as in the pattern "#,##0". + *
  • The string must contain a complete prefix and suffix. For example, if the pattern is + * "{#};(#)", then "{123}" or "(123)" would match, but "{123", "123}", and "123" would all + * fail. (The latter strings would be accepted in lenient mode.) + *
  • Whitespace may not appear at arbitrary places in the string. In lenient mode, + * whitespace is allowed to occur arbitrarily before and after prefixes and exponent + * separators. + *
  • Leading grouping separators are not allowed, as in ",123". + *
  • Minus and plus signs can only appear if specified in the pattern. In lenient mode, a + * plus or minus sign can always precede a number. + *
  • The set of characters that can be interpreted as a decimal or grouping separator is + * smaller. + *
  • If currency parsing is enabled, currencies must only appear where + * specified in either the current pattern string or in a valid pattern string for the + * current locale. For example, if the pattern is "¤0.00", then "$1.23" would match, but + * "1.23$" would fail to match. + *
+ */ + STRICT, + } + @Deprecated public static NumberParserImpl createParserFromPattern( ULocale locale, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java index 72ada185950..5f68fe60469 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java @@ -15,10 +15,10 @@ import java.text.ParsePosition; import com.ibm.icu.impl.number.AffixUtils; import com.ibm.icu.impl.number.DecimalFormatProperties; import com.ibm.icu.impl.number.Padder.PadPosition; -import com.ibm.icu.impl.number.Parse; import com.ibm.icu.impl.number.PatternStringParser; import com.ibm.icu.impl.number.PatternStringUtils; import com.ibm.icu.impl.number.parse.NumberParserImpl; +import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode; import com.ibm.icu.impl.number.parse.ParsedNumber; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.math.BigDecimal; @@ -2162,7 +2162,7 @@ public class DecimalFormat extends NumberFormat { */ @Override public synchronized boolean isParseStrict() { - return properties.getParseMode() == Parse.ParseMode.STRICT; + return properties.getParseMode() == ParseMode.STRICT; } /** @@ -2173,7 +2173,7 @@ public class DecimalFormat extends NumberFormat { */ @Override public synchronized void setParseStrict(boolean parseStrict) { - Parse.ParseMode mode = parseStrict ? Parse.ParseMode.STRICT : Parse.ParseMode.LENIENT; + ParseMode mode = parseStrict ? ParseMode.STRICT : ParseMode.LENIENT; properties.setParseMode(mode); refreshFormatter(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ScientificNumberFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ScientificNumberFormatter.java index 93bb25f8cea..e501d0182b5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ScientificNumberFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ScientificNumberFormatter.java @@ -13,7 +13,7 @@ import java.text.AttributedCharacterIterator.Attribute; import java.text.CharacterIterator; import java.util.Map; -import com.ibm.icu.impl.number.Parse; +import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.util.ULocale; @@ -230,14 +230,14 @@ public final class ScientificNumberFormatter { int start = iterator.getRunStart(NumberFormat.Field.EXPONENT_SIGN); int limit = iterator.getRunLimit(NumberFormat.Field.EXPONENT_SIGN); int aChar = char32AtAndAdvance(iterator); - if (Parse.UNISET_MINUS.contains(aChar)) { + if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(aChar)) { append( iterator, copyFromOffset, start, result); result.append(SUPERSCRIPT_MINUS_SIGN); - } else if (Parse.UNISET_PLUS.contains(aChar)) { + } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(aChar)) { append( iterator, copyFromOffset, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java b/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java index d4b75fcd276..069608f6592 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/Currency.java @@ -756,21 +756,6 @@ public class Currency extends MeasureUnit { } } - /** - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public static TextTrieMap.ParseState openParseState( - ULocale locale, int startingCp, int type) { - List> currencyTrieVec = getCurrencyTrieVec(locale); - if (type == Currency.LONG_NAME) { - return currencyTrieVec.get(1).openParseState(startingCp); - } else { - return currencyTrieVec.get(0).openParseState(startingCp); - } - } - private static List> getCurrencyTrieVec(ULocale locale) { List> currencyTrieVec = CURRENCY_NAME_CACHE.get(locale); if (currencyTrieVec == null) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt index 3aa732a70d8..a7e30d29da1 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt @@ -754,9 +754,9 @@ parse output breaks +1,234,567.8901 1234567.8901 +1,23,4567.8901 1234567.8901 // P supports grouping separators in the fraction; none of the others do. -+1,23,4567.89,01 1234567.8901 CJKS ++1,23,4567.89,01 1234567.8901 CJK +1,23,456.78.9 123456.78 -+12.34,56 12.3456 CJKS ++12.34,56 12.3456 CJK +79,,20,3 79203 +79 20 3 79203 K // Parsing stops at comma as it is different from other separators @@ -862,7 +862,7 @@ parse output breaks +1,234.5 1234.5 // Comma after decimal means a fractional grouping separator // P fails since it finds an invalid grouping size -+1,23,456.78,9 123456.789 JKPS ++1,23,456.78,9 123456.789 JKP // C and J fail upon seeing the second decimal point +1,23,456.78.9 123456.78 CJ +79 79 @@ -997,8 +997,8 @@ parse output breaks 123.456 123456 123,456 123.456 // The separator after the comma can be inrepreted as a fractional grouping -987,654.321 987.654321 CJKS -987,654 321 987.654321 CJKS +987,654.321 987.654321 CJK +987,654 321 987.654321 CJK 987.654,321 987654.321 test select @@ -1188,17 +1188,17 @@ $53.45 fail USD J USD 53.45 53.45 USD J 53.45USD 53.45 USD CJ USD53.45 53.45 USD -// S fails these because '(' is an incomplete prefix. -(7.92) USD -7.92 USD CJSP -(7.92) GBP -7.92 GBP CJSP -(7.926) USD -7.926 USD CJSP -(7.926 USD) -7.926 USD CJSP +// P fails these because '(' is an incomplete prefix. +(7.92) USD -7.92 USD CJP +(7.92) GBP -7.92 GBP CJP +(7.926) USD -7.926 USD CJP +(7.926 USD) -7.926 USD CJP (USD 7.926) -7.926 USD J -USD (7.926) -7.926 USD CJSP -USD (7.92) -7.92 USD CJSP -(7.92)USD -7.92 USD CJSP -USD(7.92) -7.92 USD CJSP -(8) USD -8 USD CJSP +USD (7.926) -7.926 USD CJP +USD (7.92) -7.92 USD CJP +(7.92)USD -7.92 USD CJP +USD(7.92) -7.92 USD CJP +(8) USD -8 USD CJP -8 USD -8 USD C 67 USD 67 USD C 53.45$ fail USD @@ -1429,8 +1429,8 @@ NaN NaN K 1E2147483646 1E2147483646 1E-2147483649 0 1E-2147483648 0 -// S and P return zero here -1E-2147483647 1E-2147483647 SP +// P returns zero here +1E-2147483647 1E-2147483647 P 1E-2147483646 1E-2147483646 test format push limits @@ -1460,7 +1460,7 @@ set pattern #,##0 begin parse output breaks // K and J return null; S, C, and P return 99 - 9 9 9 CJKSP + 9 9 9 CJKP // K returns null 9 999 9999 K @@ -1504,7 +1504,7 @@ y g h56 -56 JK 56i j‎k -56 CJK 56‎i jk -56 CJK // S and C get 56 (accepts ' ' gs grouping); J and K get null -5 6 fail CSP +5 6 fail CP 5‎6 5 JK test parse spaces in grouping @@ -1515,8 +1515,8 @@ set pattern #,##0 begin parse output breaks // C, J, S, and P get "12" here -1 2 1 CJSP -1 23 1 CJSP +1 2 1 CJP +1 23 1 CJP // K gets 1 here; doesn't pick up the grouping separator 1 234 1234 K diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java index 3622fa69165..d0a5879a319 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java @@ -4,7 +4,6 @@ package com.ibm.icu.dev.test.format; import java.math.BigDecimal; import java.math.RoundingMode; -import java.text.ParseException; import java.text.ParsePosition; import org.junit.Test; @@ -12,11 +11,10 @@ import org.junit.Test; import com.ibm.icu.dev.test.TestUtil; import com.ibm.icu.impl.number.DecimalFormatProperties; import com.ibm.icu.impl.number.Padder.PadPosition; -import com.ibm.icu.impl.number.Parse; -import com.ibm.icu.impl.number.Parse.ParseMode; import com.ibm.icu.impl.number.PatternStringParser; import com.ibm.icu.impl.number.PatternStringUtils; import com.ibm.icu.impl.number.parse.NumberParserImpl; +import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode; import com.ibm.icu.number.LocalizedNumberFormatter; import com.ibm.icu.number.NumberFormatter; import com.ibm.icu.text.DecimalFormat; @@ -727,98 +725,6 @@ public class NumberFormatDataDrivenTest { return null; } - /** - * Runs a single parse test. On success, returns null. On failure, returns the error. This implementation just - * returns null. Subclasses should override. - * - * @param tuple - * contains the parameters of the format test. - */ - @Override - public String parse(DataDrivenNumberFormatTestData tuple) { - String pattern = (tuple.pattern == null) ? "0" : tuple.pattern; - DecimalFormatProperties properties; - ParsePosition ppos = new ParsePosition(0); - Number actual; - try { - properties = PatternStringParser.parseToProperties( - pattern, - tuple.currency != null ? PatternStringParser.IGNORE_ROUNDING_ALWAYS - : PatternStringParser.IGNORE_ROUNDING_NEVER); - propertiesFromTuple(tuple, properties); - actual = Parse.parse(tuple.parse, ppos, properties, DecimalFormatSymbols.getInstance(tuple.locale)); - } catch (IllegalArgumentException e) { - return "parse exception: " + e.getMessage(); - } - if (actual == null && ppos.getIndex() != 0) { - throw new AssertionError("Error: value is null but parse position is not zero"); - } - if (ppos.getIndex() == 0) { - return "Parse failed; got " + actual + ", but expected " + tuple.output; - } - if (tuple.output.equals("NaN")) { - if (!Double.isNaN(actual.doubleValue())) { - return "Expected NaN, but got: " + actual; - } - return null; - } else if (tuple.output.equals("Inf")) { - if (!Double.isInfinite(actual.doubleValue()) || Double.compare(actual.doubleValue(), 0.0) < 0) { - return "Expected Inf, but got: " + actual; - } - return null; - } else if (tuple.output.equals("-Inf")) { - if (!Double.isInfinite(actual.doubleValue()) || Double.compare(actual.doubleValue(), 0.0) > 0) { - return "Expected -Inf, but got: " + actual; - } - return null; - } else if (tuple.output.equals("fail")) { - return null; - } else if (new BigDecimal(tuple.output).compareTo(new BigDecimal(actual.toString())) != 0) { - return "Expected: " + tuple.output + ", got: " + actual; - } else { - return null; - } - } - - /** - * Runs a single parse currency test. On success, returns null. On failure, returns the error. This - * implementation just returns null. Subclasses should override. - * - * @param tuple - * contains the parameters of the format test. - */ - @Override - public String parseCurrency(DataDrivenNumberFormatTestData tuple) { - String pattern = (tuple.pattern == null) ? "0" : tuple.pattern; - DecimalFormatProperties properties; - ParsePosition ppos = new ParsePosition(0); - CurrencyAmount actual; - try { - properties = PatternStringParser.parseToProperties( - pattern, - tuple.currency != null ? PatternStringParser.IGNORE_ROUNDING_ALWAYS - : PatternStringParser.IGNORE_ROUNDING_NEVER); - propertiesFromTuple(tuple, properties); - actual = Parse - .parseCurrency(tuple.parse, ppos, properties, DecimalFormatSymbols.getInstance(tuple.locale)); - } catch (ParseException e) { - e.printStackTrace(); - return "parse exception: " + e.getMessage(); - } - if (ppos.getIndex() == 0 || actual.getCurrency().getCurrencyCode().equals("XXX")) { - return "Parse failed; got " + actual + ", but expected " + tuple.output; - } - BigDecimal expectedNumber = new BigDecimal(tuple.output); - if (expectedNumber.compareTo(new BigDecimal(actual.getNumber().toString())) != 0) { - return "Wrong number: Expected: " + expectedNumber + ", got: " + actual; - } - String expectedCurrency = tuple.outputCurrency; - if (!expectedCurrency.equals(actual.getCurrency().toString())) { - return "Wrong currency: Expected: " + expectedCurrency + ", got: " + actual; - } - return null; - } - /** * Runs a single select test. On success, returns null. On failure, returns the error. This implementation just * returns null. Subclasses should override. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java index dc4611ae3d3..bb557cc970c 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/PropertiesTest.java @@ -31,9 +31,8 @@ import org.junit.Test; import com.ibm.icu.dev.test.serializable.SerializableTestUtility; import com.ibm.icu.impl.number.DecimalFormatProperties; import com.ibm.icu.impl.number.Padder.PadPosition; -import com.ibm.icu.impl.number.Parse.GroupingMode; -import com.ibm.icu.impl.number.Parse.ParseMode; import com.ibm.icu.impl.number.PatternStringParser; +import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode; import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; import com.ibm.icu.text.CurrencyPluralInfo; import com.ibm.icu.text.MeasureFormat.FormatWidth; @@ -252,12 +251,6 @@ public class PropertiesTest { CurrencyUsage[] values = CurrencyUsage.values(); return values[seed % values.length]; - } else if (type == GroupingMode.class) { - if (seed == 0) - return null; - GroupingMode[] values = GroupingMode.values(); - return values[seed % values.length]; - } else if (type == FormatWidth.class) { if (seed == 0) return null; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TextTrieMapTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TextTrieMapTest.java index 291708208b7..4bd8fe1137e 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TextTrieMapTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TextTrieMapTest.java @@ -85,67 +85,6 @@ public class TextTrieMapTest extends TestFmwk { {"l📺", null, SUP1}, }; - private static final Object[][] TESTCASES_PARSE = { - { - "Sunday", - new Object[]{ - new Object[]{SAT,SUN}, new Object[]{SAT,SUN}, // matches on "S" - null, null, // matches on "Su" - SUN, SUN, // matches on "Sun" - null, null, // matches on "Sund" - null, null, // matches on "Sunda" - SUN, SUN, // matches on "Sunday" - } - }, - { - "sunday", - new Object[]{ - null, new Object[]{SAT,SUN}, // matches on "s" - null, null, // matches on "su" - null, SUN, // matches on "sun" - null, null, // matches on "sund" - null, null, // matches on "sunda" - null, SUN, // matches on "sunday" - } - }, - { - "MMM", - new Object[]{ - MON, MON, // matches on "M" - // no more matches in data - } - }, - { - "BBB", - new Object[]{ - // no matches in data - } - }, - { - "l📺12", - new Object[]{ - null, null, // matches on "L" - null, SUP1, // matches on "L📺" - null, SUP2, // matches on "L📺1" - // no more matches in data - } - }, - { - "L📻", - new Object[] { - null, null, // matches on "L" - SUP3, SUP3, // matches on "L📻" - } - }, - { - "L🃏", - new Object[] { - null, null, // matches on "L" - SUP4, SUP4, // matches on "L🃏" - } - } - }; - @Test public void TestCaseSensitive() { Iterator itr = null; @@ -172,13 +111,6 @@ public class TextTrieMapTest extends TestFmwk { checkResult("get(String, int) case " + i, itr, TESTCASES[i][1]); } - logln("Test for ParseState"); - for (int i = 0; i < TESTCASES_PARSE.length; i++) { - String test = (String) TESTCASES_PARSE[i][0]; - Object[] expecteds = (Object[]) TESTCASES_PARSE[i][1]; - checkParse(map, test, expecteds, true); - } - logln("Test for partial match"); for (Object[] cas : TESTDATA) { String str = (String) cas[0]; @@ -239,13 +171,6 @@ public class TextTrieMapTest extends TestFmwk { checkResult("get(String, int) case " + i, itr, TESTCASES[i][2]); } - logln("Test for ParseState"); - for (int i = 0; i < TESTCASES_PARSE.length; i++) { - String test = (String) TESTCASES_PARSE[i][0]; - Object[] expecteds = (Object[]) TESTCASES_PARSE[i][1]; - checkParse(map, test, expecteds, false); - } - logln("Test for partial match"); for (Object[] cas : TESTDATA) { String str = (String) cas[0]; @@ -279,54 +204,6 @@ public class TextTrieMapTest extends TestFmwk { checkResult("Get Sunday", itr, new Object[]{SUN, FOO, BAR}); } - private void checkParse(TextTrieMap map, String text, Object[] rawExpecteds, boolean caseSensitive) { - // rawExpecteds has even-valued indices for case sensitive and odd-valued indicies for case insensitive - // Get out only the values that we want. - Object[] expecteds = null; - for (int i=rawExpecteds.length/2-1; i>=0; i--) { - int j = i*2+(caseSensitive?0:1); - if (rawExpecteds[j] != null) { - if (expecteds == null) { - expecteds = new Object[i+1]; - } - expecteds[i] = rawExpecteds[j]; - } - } - if (expecteds == null) { - expecteds = new Object[0]; - } - - TextTrieMap.ParseState state = null; - for (int charOffset=0, cpOffset=0; charOffset < text.length(); cpOffset++) { - int cp = Character.codePointAt(text, charOffset); - if (state == null) { - state = map.openParseState(cp); - } - if (state == null) { - assertEquals("Expected matches, but no matches are available", 0, expecteds.length); - break; - } - state.accept(cp); - if (cpOffset < expecteds.length - 1) { - assertFalse( - "In middle of parse sequence, but atEnd() is true: '" + text + "' offset " + charOffset, - state.atEnd()); - } else if (cpOffset == expecteds.length) { - // Note: it possible for atEnd() to be either true or false at expecteds.length - 1; - // if true, we are at the end of the input string; if false, there is still input string - // left to be consumed, but we don't know if there are remaining matches. - assertTrue( - "At end of parse sequence, but atEnd() is false: '" + text + "' offset " + charOffset, - state.atEnd()); - break; - } - Object expected = expecteds[cpOffset]; - Iterator actual = state.getCurrentMatches(); - checkResult("ParseState '" + text + "' offset " + charOffset, actual, expected); - charOffset += Character.charCount(cp); - } - } - private boolean eql(Object o1, Object o2) { if (o1 == null || o2 == null) { if (o1 == null && o2 == null) { -- 2.40.0