CaseMap.toTitle().wholeString().noLowercase();
private static String toTitleWholeStringNoLowercase(ULocale locale, String s) {
- return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
- locale.toLocale(), null, s, new StringBuilder(), null).toString();
+ return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, s);
}
public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) {
return GROUPER_AUTO;
case ON_ALIGNED:
return GROUPER_ON_ALIGNED;
- case WESTERN:
+ case THOUSANDS:
return GROUPER_WESTERN;
default:
throw new AssertionError();
return GROUPER_WESTERN;
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
return GROUPER_INDIC;
- } else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 1) {
+ } else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 2) {
return GROUPER_WESTERN_MIN2;
- } else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
+ } else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 2) {
return GROUPER_INDIC_MIN2;
} else {
return new Grouper(grouping1, grouping2, minGrouping);
* The maximum number of fraction places, integer numerals, or significant digits. TODO: This does
* not feel like the best home for this value.
*/
- public static final int MAX_INT_FRAC_SIG = 100;
+ public static final int MAX_INT_FRAC_SIG = 999;
/**
* Converts a rounding mode and metadata about the quantity being rounded to a boolean determining
return null;
}
- affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
series.factory = factory;
series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
- if (segment.getCodePoint() == cp) {
- segment.adjustOffset(Character.charCount(cp));
+ if (segment.matches(cp)) {
+ segment.adjustOffsetByCodePoint();
result.setCharsConsumed(segment);
}
return false;
private final String currency1;
private final String currency2;
- public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
+ public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
return new CurrencyMatcher(currency.getSubtype(),
- ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
- ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
+ currency.getSymbol(loc),
+ currency.getCurrencyCode());
}
private CurrencyMatcher(String isoCode, String currency1, String currency2) {
DecimalFormatSymbols symbols;
IgnorablesMatcher ignorables;
ULocale locale;
- int parseFlags;
public MinusSignMatcher minusSign(boolean allowTrailing) {
return MinusSignMatcher.getInstance(symbols, allowTrailing);
public AnyMatcher currency() {
AnyMatcher any = new AnyMatcher();
- any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+ any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
any.freeze();
return any;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
-import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
public class NanMatcher extends SymbolMatcher {
private static final NanMatcher DEFAULT = new NanMatcher("NaN");
- private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true));
public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) {
- String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags);
+ String symbolString = symbols.getNaN();
if (DEFAULT.string.equals(symbolString)) {
return DEFAULT;
- } else if (DEFAULT_FOLDED.string.equals(symbolString)) {
- return DEFAULT_FOLDED;
} else {
return new NanMatcher(symbolString);
}
*/
public class NumberParserImpl {
+ @Deprecated
+ public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) {
+ NumberParserImpl parser = new NumberParserImpl(parseFlags);
+ DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
+ IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
+
+ MatcherFactory factory = new MatcherFactory();
+ factory.currency = Currency.getInstance("USD");
+ factory.symbols = symbols;
+ factory.ignorables = ignorables;
+ factory.locale = locale;
+
+ ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
+ AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
+
+ Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
+ parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
+ parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+ parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
+
+ parser.freeze();
+ return parser;
+ }
+
// TODO: Find a better place for this enum.
/** Controls the set of rules for parsing a string. */
public static enum ParseMode {
// Temporary frontend for testing.
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
- | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
+ | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
+ | ParsingUtils.PARSE_FLAG_OPTIMIZE;
if (strictGrouping) {
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
}
- NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
+ NumberParserImpl parser = new NumberParserImpl(parseFlags);
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
- factory.parseFlags = parseFlags;
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
- parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+ parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
parser.addMatcher(new RequireNumberMatcher());
if (parseCurrency || patternInfo.hasCurrencySign()) {
parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
}
+ if (optimize) {
+ parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE;
+ }
IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
- NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
+ NumberParserImpl parser = new NumberParserImpl(parseFlags);
MatcherFactory factory = new MatcherFactory();
factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
- factory.parseFlags = parseFlags;
//////////////////////
/// AFFIX MATCHERS ///
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
- parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+ parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
}
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
if (!properties.getParseNoExponent()) {
- parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+ parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
}
//////////////////
/**
* Creates a new, empty parser.
*
- * @param ignoreCase
- * If true, perform case-folding. This parameter needs to go into the constructor because
- * its value is used during the construction of the matcher chain.
- * @param optimize
- * If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
- * runtime but increases construction runtime. If the parser is going to be used only once
- * or twice, set this to false; if it is going to be used hundreds of times, set it to
- * true.
+ * @param parseFlags
+ * The parser settings defined in the PARSE_FLAG_* fields.
*/
- public NumberParserImpl(int parseFlags, boolean optimize) {
+ public NumberParserImpl(int parseFlags) {
matchers = new ArrayList<NumberParseMatcher>();
- if (optimize) {
+ if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
leadCodePointses = new ArrayList<UnicodeSet>();
} else {
leadCodePointses = null;
assert !frozen;
this.matchers.add(matcher);
if (leadCodePointses != null) {
- UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
- assert leadCodePoints.isFrozen();
- this.leadCodePointses.add(leadCodePoints);
+ addLeadCodePointsForMatcher(matcher);
}
}
this.matchers.addAll(matchers);
if (leadCodePointses != null) {
for (NumberParseMatcher matcher : matchers) {
- UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
- assert leadCodePoints.isFrozen();
- this.leadCodePointses.add(leadCodePoints);
+ addLeadCodePointsForMatcher(matcher);
}
}
}
+ private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
+ UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
+ assert leadCodePoints.isFrozen();
+ // TODO: Avoid the clone operation here.
+ if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
+ leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
+ .freeze();
+ }
+ this.leadCodePointses.add(leadCodePoints);
+ }
+
public void setComparator(Comparator<ParsedNumber> comparator) {
assert !frozen;
this.comparator = comparator;
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
assert frozen;
assert start >= 0 && start < input.length();
- StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags));
+ StringSegment segment = new StringSegment(input, parseFlags);
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
-import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSet.EntryRange;
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
+ public static final int PARSE_FLAG_OPTIMIZE = 0x1000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {
}
}
- /**
- * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
- */
- public static String maybeFold(String input, int parseFlags) {
- UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
- if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
- return UCharacter.foldCase(input, true);
- } else {
- return input;
- }
- }
-
}
private final String exponentSeparatorString;
private final DecimalMatcher exponentMatcher;
- public static ScientificMatcher getInstance(
- DecimalFormatSymbols symbols,
- Grouper grouper,
- int parseFlags) {
+ public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) {
// TODO: Static-initialize most common instances?
- return new ScientificMatcher(symbols, grouper, parseFlags);
+ return new ScientificMatcher(symbols, grouper);
}
- private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
- exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
+ private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) {
+ exponentSeparatorString = symbols.getExponentSeparator();
exponentMatcher = DecimalMatcher.getInstance(symbols,
grouper,
ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
if (segment.length() == 0) {
return true;
}
- int leadCp = segment.getCodePoint();
- if (leadCp == -1) {
- // Partial code point match
- return true;
- }
// Allow a sign, and then try to match digits.
boolean minusSign = false;
- if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
+ if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
minusSign = true;
- segment.adjustOffset(Character.charCount(leadCp));
- } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
- segment.adjustOffset(Character.charCount(leadCp));
+ segment.adjustOffsetByCodePoint();
+ } else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
+ segment.adjustOffsetByCodePoint();
}
int digitsOffset = segment.getOffset();
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UnicodeSet;
+
/**
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
* subSequence methods all operate relative to the fixed offset into the String.
private final String str;
private int start;
private int end;
+ private boolean foldCase;
- public StringSegment(String str) {
+ public StringSegment(String str, int parseFlags) {
this.str = str;
this.start = 0;
this.end = str.length();
+ this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
}
public int getOffset() {
start += delta;
}
+ /**
+ * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
+ */
+ public void adjustOffsetByCodePoint() {
+ start += Character.charCount(getCodePoint());
+ }
+
public void setLength(int length) {
assert length >= 0;
assert start + length <= str.length();
/**
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
* code point.
+ *
+ * <p>
+ * <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
+ * folding logic, instead of this method.
*/
public int getCodePoint() {
assert start < end;
char lead = str.charAt(start);
- if (Character.isHighSurrogate(lead) && start + 1 < end) {
- return Character.toCodePoint(lead, str.charAt(start + 1));
- } else if (Character.isSurrogate(lead)) {
- return -1;
- } else {
- return lead;
+ char trail;
+ if (Character.isHighSurrogate(lead)
+ && start + 1 < end
+ && Character.isLowSurrogate(trail = str.charAt(start + 1))) {
+ return Character.toCodePoint(lead, trail);
}
+ return lead;
+ }
+
+ /**
+ * Returns true if the first code point of this StringSegment equals the given code point.
+ *
+ * <p>
+ * This method will perform case folding if case folding is enabled for the parser.
+ */
+ public boolean matches(int otherCp) {
+ return codePointsEqual(getCodePoint(), otherCp, foldCase);
+ }
+
+ /**
+ * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
+ */
+ public boolean matches(UnicodeSet uniset) {
+ // TODO: Move UnicodeSet case-folding logic here.
+ // TODO: Handle string matches here instead of separately.
+ int cp = getCodePoint();
+ if (cp == -1) {
+ return false;
+ }
+ return uniset.contains(cp);
}
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
* since the first 2 characters are the same.
+ *
+ * <p>
+ * This method will perform case folding if case folding is enabled for the parser.
*/
public int getCommonPrefixLength(CharSequence other) {
+ return getPrefixLengthInternal(other, foldCase);
+ }
+
+ /**
+ * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
+ * enabled for the parser.
+ */
+ public int getCaseSensitivePrefixLength(CharSequence other) {
+ return getPrefixLengthInternal(other, false);
+ }
+
+ private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
int offset = 0;
for (; offset < Math.min(length(), other.length());) {
- if (charAt(offset) != other.charAt(offset)) {
+ // TODO: case-fold code points, not chars
+ char c1 = charAt(offset);
+ char c2 = other.charAt(offset);
+ if (!codePointsEqual(c1, c2, foldCase)) {
break;
}
offset++;
return offset;
}
+ // /**
+ // * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
+ // */
+ // public static String maybeFold(String input, int parseFlags) {
+ // UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
+ // if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
+ // return UCharacter.foldCase(input, true);
+ // } else {
+ // return input;
+ // }
+ // }
+
+ private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
+ if (cp1 == cp2) {
+ return true;
+ }
+ if (!foldCase) {
+ return false;
+ }
+ cp1 = UCharacter.foldCase(cp1, true);
+ cp2 = UCharacter.foldCase(cp2, true);
+ return cp1 == cp2;
+ }
+
@Override
public String toString() {
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
}
}
- int cp = segment.getCodePoint();
- if (cp != -1 && uniSet.contains(cp)) {
- segment.adjustOffset(Character.charCount(cp));
+ if (segment.matches(uniSet)) {
+ segment.adjustOffsetByCodePoint();
accept(segment, result);
return false;
}
DIGITS,
NAN_LEAD,
SCIENTIFIC_LEAD,
- CWCF,
+ CWCF, // TODO: Check if this is being used and remove it if not.
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
}
- /**
- * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
- * and sometimes has no effect at all; the original string is returned whenever casing
- * would not be appropriate for the first word (such as for CJK characters or initial numbers).
- * Initial non-letters are skipped in order to find the character to change.
- * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
- * <p>Examples:
- * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
- * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
- * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
- * <tr><td>49ers win!</td><td>49ers win!</td></tr>
- * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
- * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
- * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
- * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
- * </table>
- * @param locale the locale for accessing exceptional behavior (eg for tr).
- * @param str the source string to change
- * @return the modified string, or the original if no modifications were necessary.
- * @internal
- * @deprecated ICU internal only
- */
- @Deprecated
- public static String toTitleFirst(ULocale locale, String str) {
- // TODO: Remove this function. Inline it where it is called in CLDR.
- return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
- }
-
- private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
- com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
-
/**
* {@icu} <p>Returns the titlecase version of the argument string.
* <p>Position for titlecasing is determined by the argument break
import com.ibm.icu.util.ICUUncheckedIOException;
/**
- * The result of a number formatting operation. This class allows the result to be exported in several data types,
- * including a String, an AttributedCharacterIterator, and a BigDecimal.
+ * The result of a number formatting operation. This class allows the result to be exported in several
+ * data types, including a String, an AttributedCharacterIterator, and a BigDecimal.
*
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
}
/**
- * Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more efficient than
- * creating a String.
+ * Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more
+ * efficient than creating a String.
*
* <p>
- * If an IOException occurs when appending to the Appendable, an unchecked {@link ICUUncheckedIOException} is thrown
- * instead.
+ * If an IOException occurs when appending to the Appendable, an unchecked
+ * {@link ICUUncheckedIOException} is thrown instead.
*
* @param appendable
* The Appendable to which to append the formatted number string.
}
/**
- * Determine the start and end indices of the first occurrence of the given <em>field</em> in the output string.
- * This allows you to determine the locations of the integer part, fraction part, and sign.
+ * Determine the start and end indices of the first occurrence of the given <em>field</em> in the
+ * output string. This allows you to determine the locations of the integer part, fraction part, and
+ * sign.
*
* <p>
- * If multiple different field attributes are needed, this method can be called repeatedly, or if <em>all</em> field
- * attributes are needed, consider using getFieldIterator().
+ * If multiple different field attributes are needed, this method can be called repeatedly, or if
+ * <em>all</em> field attributes are needed, consider using getFieldIterator().
*
* <p>
- * If a field occurs multiple times in an output string, such as a grouping separator, this method will only ever
- * return the first occurrence. Use getFieldIterator() to access all occurrences of an attribute.
+ * If a field occurs multiple times in an output string, such as a grouping separator, this method
+ * will only ever return the first occurrence. Use getFieldIterator() to access all occurrences of an
+ * attribute.
*
* @param fieldPosition
* The FieldPosition to populate with the start and end indices of the desired field.
}
/**
- * Export the formatted number as an AttributedCharacterIterator. This allows you to determine which characters in
- * the output string correspond to which <em>fields</em>, such as the integer part, fraction part, and sign.
+ * Export the formatted number as an AttributedCharacterIterator. This allows you to determine which
+ * characters in the output string correspond to which <em>fields</em>, such as the integer part,
+ * fraction part, and sign.
*
* <p>
* If information on only one field is needed, consider using populateFieldPosition() instead.
*
- * @return An AttributedCharacterIterator, containing information on the field attributes of the number string.
+ * @return An AttributedCharacterIterator, containing information on the field attributes of the
+ * number string.
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
* @see com.ibm.icu.text.NumberFormat.Field
}
/**
- * Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact number being printed
- * after scaling and rounding have been applied by the number formatting pipeline.
+ * Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact
+ * number being printed after scaling and rounding have been applied by the number formatting
+ * pipeline.
*
* @return A BigDecimal representation of the formatted number.
* @draft ICU 60
/**
* @internal
- * @deprecated This API is ICU internal only.
+ * @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
+ * {@link #getFieldIterator} for similar functionality.
*/
@Deprecated
public String getPrefix() {
NumberStringBuilder temp = new NumberStringBuilder();
- int length = micros.modOuter.apply(temp, 0, 0);
- length += micros.modMiddle.apply(temp, 0, length);
- /* length += */ micros.modInner.apply(temp, 0, length);
- int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
- + micros.modInner.getPrefixLength();
+ // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
+ micros.modMiddle.apply(temp, 0, 0);
+ int prefixLength = micros.modMiddle.getPrefixLength();
return temp.subSequence(0, prefixLength).toString();
}
/**
* @internal
- * @deprecated This API is ICU internal only.
+ * @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
+ * {@link #getFieldIterator} for similar functionality.
*/
@Deprecated
public String getSuffix() {
NumberStringBuilder temp = new NumberStringBuilder();
- int length = micros.modOuter.apply(temp, 0, 0);
- length += micros.modMiddle.apply(temp, 0, length);
- length += micros.modInner.apply(temp, 0, length);
- int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
- + micros.modInner.getPrefixLength();
+ // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
+ int length = micros.modMiddle.apply(temp, 0, 0);
+ int prefixLength = micros.modMiddle.getPrefixLength();
return temp.subSequence(prefixLength, length).toString();
}
public int hashCode() {
// NumberStringBuilder and BigDecimal are mutable, so we can't call
// #equals() or #hashCode() on them directly.
- return Arrays.hashCode(nsb.toCharArray()) ^ Arrays.hashCode(nsb.toFieldArray()) ^ fq.toBigDecimal().hashCode();
+ return Arrays.hashCode(nsb.toCharArray())
+ ^ Arrays.hashCode(nsb.toFieldArray())
+ ^ fq.toBigDecimal().hashCode();
}
/**
// #equals() or #hashCode() on them directly.
FormattedNumber _other = (FormattedNumber) other;
return Arrays.equals(nsb.toCharArray(), _other.nsb.toCharArray())
- ^ Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
- ^ fq.toBigDecimal().equals(_other.fq.toBigDecimal());
+ && Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
+ && fq.toBigDecimal().equals(_other.fq.toBigDecimal());
}
}
\ No newline at end of file
* <li>MIN2: 1234 and 12,34,567
* <li>AUTO: 1,234 and 12,34,567
* <li>ON_ALIGNED: 1,234 and 12,34,567
- * <li>WESTERN: 1,234 and 1,234,567
+ * <li>THOUSANDS: 1,234 and 1,234,567
* </ul>
*
* <p>
* @provisional This API might change or be removed in a future release.
* @see NumberFormatter
*/
- WESTERN
+ THOUSANDS
}
/**
case TIME_UNIT_FORMAT:
return createTimeUnitFormat();
case CURRENCY_FORMAT:
- return new CurrencyFormat(locale);
+ return MeasureFormat.getCurrencyFormat(locale);
default:
throw new InvalidObjectException("Unknown subclass: " + subClass);
}
import com.ibm.icu.impl.Trie2;
/**
-* <p>Internal class used for Rule Based Break Iterators</p>
+* <p>Internal class used for Rule Based Break Iterators.</p>
* <p>This class provides access to the compiled break rule data, as
* it is stored in a .brk file.
+* Not intended for public use; declared public for testing purposes only.
+* @internal
*/
-final class RBBIDataWrapper {
+public final class RBBIDataWrapper {
//
// These fields are the ready-to-use compiled rule data, as
// read from the file.
//
- RBBIDataHeader fHeader;
- short fFTable[];
+ public RBBIDataHeader fHeader;
+ public short fFTable[];
short fRTable[];
short fSFTable[];
short fSRTable[];
// Index offsets to the fields in a state table row.
// Corresponds to struct RBBIStateTableRow in the C version.
//
- final static int ACCEPTING = 0;
- final static int LOOKAHEAD = 1;
- final static int TAGIDX = 2;
- final static int RESERVED = 3;
- final static int NEXTSTATES = 4;
+ /** @internal */
+ public final static int ACCEPTING = 0;
+ /** @internal */
+ public final static int LOOKAHEAD = 1;
+ /** @internal */
+ public final static int TAGIDX = 2;
+ /** @internal */
+ public final static int RESERVED = 3;
+ /** @internal */
+ public final static int NEXTSTATES = 4;
// Index offsets to header fields of a state table
// struct RBBIStateTable {... in the C version.
/**
* Data Header. A struct-like class with the fields from the RBBI data file header.
+ * Not intended for public use, declared public for testing purposes only.
+ * @internal
*/
- final static class RBBIDataHeader {
+ public final static class RBBIDataHeader {
int fMagic; // == 0xbla0
byte[] fFormatVersion; // For ICU 3.4 and later.
int fLength; // Total length in bytes of this RBBI Data,
// including all sections, not just the header.
- int fCatCount; // Number of character categories.
+ public int fCatCount; // Number of character categories.
//
// Offsets and sizes of each of the subsections within the RBBI data.
/**
* RBBI State Table Indexing Function. Given a state number, return the
* array index of the start of the state table row for that state.
- *
+ * @internal
*/
- int getRowIndex(int state){
+ public int getRowIndex(int state){
return ROW_DATA + state * (fHeader.fCatCount + 4);
}
return This;
}
- ///CLOVER:OFF
- // Getters for fields from the state table header
- //
- private int getStateTableNumStates(short table[]) {
+ /**
+ * Getters for fields from the state table header
+ * @internal
+ */
+ public int getStateTableNumStates(short table[]) {
if (isBigEndian) {
return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
} else {
return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
}
}
- ///CLOVER:ON
int getStateTableFlags(short table[]) {
// This works for up to 15 flags bits.
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
- // Generate the mapping tables (TRIE) from input 32-bit characters to
+ // Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
- builder.fSetBuilder.build();
+ builder.fSetBuilder.buildRanges();
//
// Generate the DFA state transition table.
builder.fForwardTables.printRuleStatusTable();
}
+ builder.optimizeTables();
+ builder.fSetBuilder.buildTrie();
//
// Package up the compiled data, writing it to an output stream
// in the serialization format. This is the same as the ICU4C runtime format.
//
builder.flattenData(os);
}
+
+ static class ClassPair {
+ int left = 3;
+ int right = 0;
+ }
+
+ void optimizeTables() {
+ ClassPair duplPair = new ClassPair();
+
+ while (fForwardTables.findDuplCharClassFrom(duplPair)) {
+ fSetBuilder.mergeCategories(duplPair);
+ fForwardTables.removeColumn(duplPair.right);
+ fReverseTables.removeColumn(duplPair.right);
+ fSafeFwdTables.removeColumn(duplPair.right);
+ fSafeRevTables.removeColumn(duplPair.right);
+ }
+
+ fForwardTables.removeDuplicateStates();
+ fReverseTables.removeDuplicateStates();
+ fSafeFwdTables.removeDuplicateStates();
+ fSafeRevTables.removeDuplicateStates();
+
}
}
}
if (setName.equals("dictionary")) {
- this.fNum |= 0x4000;
+ this.fNum |= DICT_BIT;
break;
}
}
boolean fSawBOF;
+ static final int DICT_BIT = 0x4000;
+
//------------------------------------------------------------------------
//
// from the Unicode Sets.
//
//------------------------------------------------------------------------
- void build() {
+ void buildRanges() {
RangeDescriptor rlRange;
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
+ }
+
+
+ /**
+ * Build the Trie table for mapping UChar32 values to the corresponding
+ * range group number.
+ */
+ void buildTrie() {
+ RangeDescriptor rlRange;
fTrie = new Trie2Writable(0, // Initial value for all code points.
0); // Error value for out-of-range input.
}
}
-
+ void mergeCategories(int left, int right) {
+ assert(left >= 1);
+ assert(right > left);
+ for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) {
+ int rangeNum = rd.fNum & ~DICT_BIT;
+ int rangeDict = rd.fNum & DICT_BIT;
+ if (rangeNum == right) {
+ rd.fNum = left | rangeDict;
+ } else if (rangeNum > right) {
+ rd.fNum--;
+ }
+ }
+ --fGroupCount;
+ }
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
if (groupNum<10) {System.out.print(" ");}
System.out.print(groupNum + " ");
- if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
+ if ((rlRange.fNum & DICT_BIT) != 0) { System.out.print(" <DICT> ");}
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
RBBINode usetNode = rlRange.fIncludesSets.get(i);
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
- // the fLookAhead field or this state also.
+ // the fLookAhead field for this state also.
if (endMarker.fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
+//
+// findDuplCharClassFrom()
+//
+boolean findDuplCharClassFrom(RBBIRuleBuilder.ClassPair classPair) {
+ int numStates = fDStates.size();
+ int numCols = fRB.fSetBuilder.getNumCharCategories();
+
+ uint16_t table_base;
+ uint16_t table_dupl;
+ for (; baseCategory < numCols-1; ++baseCategory) {
+ for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
+ for (int state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+ table_base = (uint16_t)sd.fDtran.elementAti(baseCategory);
+ table_dupl = (uint16_t)sd.fDtran.elementAti(duplCategory);
+ if (table_base != table_dupl) {
+ break;
+ }
+ }
+ if (table_base == table_dupl) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+//
+// removeColumn()
+//
+void removeColumn(int column) {
+ int numStates = fDStates.size();
+ for (int state=0; state<numStates; state++) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+ U_ASSERT(column < sd.fDtran.size());
+ sd.fDtran.removeElementAt(column);
+ }
+}
+
+/*
+ * findDuplicateState
+ */
+bool findDuplicateState(int &firstState, int &duplState) {
+ int numStates = fDStates.size();
+ int numCols = fRB.fSetBuilder.getNumCharCategories();
+
+ for (; firstState<numStates-1; ++firstState) {
+ RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates.elementAt(firstState);
+ for (duplState=firstState+1; duplState<numStates; ++duplState) {
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
+ if (firstSD.fAccepting != duplSD.fAccepting ||
+ firstSD.fLookAhead != duplSD.fLookAhead ||
+ firstSD.fTagsIdx != duplSD.fTagsIdx) {
+ continue;
+ }
+ bool rowsMatch = true;
+ for (int col=0; col < numCols; ++col) {
+ int firstVal = firstSD.fDtran.elementAti(col);
+ int duplVal = duplSD.fDtran.elementAti(col);
+ if (!((firstVal == duplVal) ||
+ ((firstVal == firstState || firstVal == duplState) &&
+ (duplVal == firstState || duplVal == duplState)))) {
+ rowsMatch = false;
+ break;
+ }
+ }
+ if (rowsMatch) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void removeState(int keepState, int duplState) {
+ U_ASSERT(keepState < duplState);
+ U_ASSERT(duplState < fDStates.size());
+
+ RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
+ fDStates.removeElementAt(duplState);
+ delete duplSD;
+
+ int numStates = fDStates.size();
+ int numCols = fRB.fSetBuilder.getNumCharCategories();
+ for (int state=0; state<numStates; ++state) {
+ RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+ for (int col=0; col<numCols; col++) {
+ int existingVal = sd.fDtran.elementAti(col);
+ int newVal = existingVal;
+ if (existingVal == duplState) {
+ newVal = keepState;
+ } else if (existingVal > duplState) {
+ newVal = existingVal - 1;
+ }
+ sd.fDtran.setElementAt(newVal, col);
+ }
+ if (sd.fAccepting == duplState) {
+ sd.fAccepting = keepState;
+ } else if (sd.fAccepting > duplState) {
+ sd.fAccepting--;
+ }
+ if (sd.fLookAhead == duplState) {
+ sd.fLookAhead = keepState;
+ } else if (sd.fLookAhead > duplState) {
+ sd.fLookAhead--;
+ }
+ }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+void removeDuplicateStates() {
+ int firstState = 3;
+ int duplicateState = 0;
+ while (findDuplicateState(firstState, duplicateState)) {
+ // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+ removeState(firstState, duplicateState);
+ }
+}
+
//-----------------------------------------------------------------------------
//
private CharacterIterator fText = new java.text.StringCharacterIterator("");
/**
- * The rule data for this BreakIterator instance. Package private.
+ * The rule data for this BreakIterator instance.
+ * Not intended for public use. Declared public for testing purposes only.
+ * @internal
*/
- RBBIDataWrapper fRData;
+ public RBBIDataWrapper fRData;
/**
* The iteration state - current position, rule status for the current position,
private static final long serialVersionUID = -3707773153184971529L;
- // These fields are supposed to be the same as the fields in mf. They
- // are here for serialization backward compatibility and to support parsing.
+ // Unlike MeasureFormat, this class is mutable and allows a new NumberFormat to be set after
+ // initialization. Keep a second copy of NumberFormat and use it instead of the one from the parent.
private NumberFormat format;
private ULocale locale;
private int style;
- // We use this field in lieu of the super class because the super class
- // is immutable while this class is mutable. The contents of the super class
- // is an empty shell. Every public method of the super class is overridden to
- // delegate to this field. Each time this object mutates, it replaces this field with
- // a new immutable instance.
-// private transient MeasureFormat mf;
-
private transient Map<TimeUnit, Map<String, Object[]>> timeUnitToCountToPatterns;
private transient PluralRules pluralRules;
private transient boolean isReady;
}
}
}
- }
+
+ @Test
+ public void TestIslamicCalOverflow() {
+ String localeID = "ar@calendar=islamic-civil";
+ Calendar cal = Calendar.getInstance(new ULocale(localeID));
+ int maxMonth = cal.getMaximum(Calendar.MONTH);
+ int maxDayOfMonth = cal.getMaximum(Calendar.DATE);
+ int jd, year, month, dayOfMonth;
+ for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
+ cal.clear();
+ cal.set(Calendar.JULIAN_DAY, jd);
+ year = cal.get(Calendar.YEAR);
+ month = cal.get(Calendar.MONTH);
+ dayOfMonth = cal.get(Calendar.DATE);
+ if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+ errln("Error: localeID " + localeID + ", julianDay " + jd + "; got year " + year + "; maxMonth " + maxMonth +
+ ", got month " + month + "; maxDayOfMonth " + maxDayOfMonth + ", got dayOfMonth " + dayOfMonth);
+ }
+ }
+ }
+}
//eof
errln("ERROR: Char digits should be Latin digits");
}
+ // Check on copy
+ DecimalFormatSymbols copy = (DecimalFormatSymbols) symbols.clone();
+ if (!Arrays.equals(copy.getDigitStrings(), osmanyaDigitStrings)) {
+ errln("ERROR: Osmanya digits (supplementary) should be set");
+ }
+ if (Character.codePointAt(osmanyaDigitStrings[0], 0) != copy.getCodePointZero()) {
+ errln("ERROR: Code point zero be Osmanya code point zero");
+ }
+ if (defZero != copy.getZeroDigit()) {
+ errln("ERROR: Zero digit should be 0");
+ }
+ if (!Arrays.equals(copy.getDigits(), defDigits)) {
+ errln("ERROR: Char digits should be Latin digits");
+ }
+
+ // Check on resource bundle
+ DecimalFormatSymbols fromData = DecimalFormatSymbols.getInstance(new ULocale("en@numbers=osma"));
+ if (!Arrays.equals(fromData.getDigitStrings(), osmanyaDigitStrings)) {
+ errln("ERROR: Osmanya digits (supplementary) should be set");
+ }
+ if (Character.codePointAt(osmanyaDigitStrings[0], 0) != fromData.getCodePointZero()) {
+ errln("ERROR: Code point zero be Osmanya code point zero");
+ }
+ if (defZero != fromData.getZeroDigit()) {
+ errln("ERROR: Zero digit should be 0");
+ }
+ if (!Arrays.equals(fromData.getDigits(), defDigits)) {
+ errln("ERROR: Char digits should be Latin digits");
+ }
+
symbols.setDigitStrings(differentDigitStrings);
if (!Arrays.equals(symbols.getDigitStrings(), differentDigitStrings)) {
errln("ERROR: Different digits should be set");
import java.io.Serializable;
import java.lang.reflect.Field;
import java.text.FieldPosition;
+import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import com.ibm.icu.text.MeasureFormat.FormatWidth;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.Measure;
import com.ibm.icu.util.MeasureUnit;
import com.ibm.icu.util.NoUnit;
assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj);
}
+ @Test
+ public void testCurrencyFormatParseIsoCode() throws ParseException {
+ MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH);
+ CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56");
+ assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0);
+ assertEquals("Should parse ISO code GTQ even though the currency is USD",
+ "GTQ", result.getCurrency().getCurrencyCode());
+ }
+
@Test
public void testDoubleZero() {
ULocale en = new ULocale("en");
new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 6,400, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 2, 8, "EUR" ),
- new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 0, 0, "USD" ),
+ new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 4, 4, "USD" ),
new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
};
@SuppressWarnings("resource") // InputStream is will be closed by the ResourceReader.
- @Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571")
@Test
public void TestCases() {
String caseFileName = "NumberFormatTestCases.txt";
assertEquals("Grouping should be off", false, df.isGroupingUsed());
}
+ @Test
+ public void Test13453_AffixContent() {
+ DecimalFormat df = (DecimalFormat) DecimalFormat.getScientificInstance();
+ assertEquals("Scientific should NOT be included", "", df.getPositiveSuffix());
+
+ df = CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactDecimalFormat.CompactStyle.SHORT);
+ assertEquals("Compact should NOT be included", "", df.getPositiveSuffix());
+
+ df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.ISOCURRENCYSTYLE);
+ df.setCurrency(Currency.getInstance("GBP"));
+ assertEquals("ISO currency SHOULD be included", "GBP", df.getPositivePrefix());
+
+ df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.PLURALCURRENCYSTYLE);
+ df.setCurrency(Currency.getInstance("GBP"));
+ assertEquals("Plural name SHOULD be included", " British pounds", df.getPositiveSuffix());
+ }
+
@Test
public void Test11035_FormatCurrencyAmount() {
double amount = 12345.67;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.number.CompactNotation;
-import com.ibm.icu.number.FormattedNumber;
import com.ibm.icu.number.FractionRounder;
import com.ibm.icu.number.IntegerWidth;
import com.ibm.icu.number.LocalizedNumberFormatter;
"8.765",
"0");
+ assertFormatDescendingBig(
+ "Indic locale with THOUSANDS grouping",
+ "",
+ NumberFormatter.with().grouping(GroupingStrategy.THOUSANDS),
+ new ULocale("en-IN"),
+ "87,650,000",
+ "8,765,000",
+ "876,500",
+ "87,650",
+ "8,765",
+ "876.5",
+ "87.65",
+ "8.765",
+ "0");
+
// NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
// If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
assertFormatDescendingBig(
assertNotEquals(NumberFormatter.with().locale(ULocale.ENGLISH), NumberFormatter.with().locale(Locale.FRENCH));
}
- @Test
- public void getPrefixSuffix() {
- Object[][] cases = {
- { NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.ISO_CODE), "GBP", "",
- "-GBP", "" },
- { NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.FULL_NAME), "",
- " British pounds", "-", " British pounds" } };
-
- for (Object[] cas : cases) {
- LocalizedNumberFormatter f = (LocalizedNumberFormatter) cas[0];
- String posPrefix = (String) cas[1];
- String posSuffix = (String) cas[2];
- String negPrefix = (String) cas[3];
- String negSuffix = (String) cas[4];
- FormattedNumber positive = f.format(1);
- FormattedNumber negative = f.format(-1);
- assertEquals(posPrefix, positive.getPrefix());
- assertEquals(posSuffix, positive.getSuffix());
- assertEquals(negPrefix, negative.getPrefix());
- assertEquals(negSuffix, negative.getSuffix());
- }
- }
-
@Test
public void plurals() {
// TODO: Expand this test.
Rounder.class.getDeclaredMethod("minMaxFraction", Integer.TYPE, Integer.TYPE),
Rounder.class.getDeclaredMethod("minMaxDigits", Integer.TYPE, Integer.TYPE), };
- final int EXPECTED_MAX_INT_FRAC_SIG = 100;
- final String expectedSubstring0 = "between 0 and 100 (inclusive)";
- final String expectedSubstring1 = "between 1 and 100 (inclusive)";
- final String expectedSubstringN1 = "between -1 and 100 (inclusive)";
+ final int EXPECTED_MAX_INT_FRAC_SIG = 999;
+ final String expectedSubstring0 = "between 0 and 999 (inclusive)";
+ final String expectedSubstring1 = "between 1 and 999 (inclusive)";
+ final String expectedSubstringN1 = "between -1 and 999 (inclusive)";
- // We require that the upper bounds all be 100 inclusive.
+ // We require that the upper bounds all be 999 inclusive.
// The lower bound may be either -1, 0, or 1.
Set<String> methodsWithLowerBound1 = new HashSet();
methodsWithLowerBound1.add("fixedDigits");
methodsWithLowerBound1.add("withMinDigits");
methodsWithLowerBound1.add("withMaxDigits");
methodsWithLowerBound1.add("withMinExponentDigits");
+ // Methods with lower bound 0:
+ // fixedFraction
+ // minFraction
+ // maxFraction
+ // minMaxFraction
+ // zeroFillTo
Set<String> methodsWithLowerBoundN1 = new HashSet();
methodsWithLowerBoundN1.add("truncateAt");
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.ParsedNumber;
+import com.ibm.icu.impl.number.parse.ParsingUtils;
import com.ibm.icu.impl.number.parse.PercentMatcher;
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
import com.ibm.icu.impl.number.parse.SeriesMatcher;
int expectedOffset = (Integer) cas[1];
boolean expectedMaybeMore = (Boolean) cas[2];
- StringSegment segment = new StringSegment(input);
+ StringSegment segment = new StringSegment(input, 0);
ParsedNumber result = new ParsedNumber();
boolean actualMaybeMore = series.match(segment, result);
int actualOffset = segment.getOffset();
result.getNumber().doubleValue(),
0.0);
}
+
+ @Test
+ public void testCaseFolding() {
+ Object[][] cases = new Object[][] {
+ // pattern, input string, case sensitive chars, case insensitive chars
+ { "0", "JP¥3456", 7, 7 },
+ { "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode
+ { "A0", "A5", 2, 2 },
+ { "A0", "a5", 0, 2 },
+ { "0", "NaN", 3, 3 },
+ { "0", "nan", 0, 3 } };
+ for (Object[] cas : cases) {
+ String patternString = (String) cas[0];
+ String inputString = (String) cas[1];
+ int expectedCaseSensitiveChars = (Integer) cas[2];
+ int expectedCaseFoldingChars = (Integer) cas[3];
+
+ NumberParserImpl caseSensitiveParser = NumberParserImpl
+ .removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE);
+ ParsedNumber result = new ParsedNumber();
+ caseSensitiveParser.parse(inputString, true, result);
+ assertEquals("Case-Sensitive: " + inputString + " on " + patternString,
+ expectedCaseSensitiveChars,
+ result.charEnd);
+
+ NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH,
+ patternString,
+ ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE);
+ result = new ParsedNumber();
+ caseFoldingParser.parse(inputString, true, result);
+ assertEquals("Folded: " + inputString + " on " + patternString,
+ expectedCaseFoldingChars,
+ result.charEnd);
+ }
+ }
}
@Test
public void testOffset() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(0, segment.getOffset());
segment.adjustOffset(3);
assertEquals(3, segment.getOffset());
@Test
public void testLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(11, segment.length());
segment.adjustOffset(3);
assertEquals(8, segment.length());
@Test
public void testCharAt() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertCharSequenceEquals(SAMPLE_STRING, segment);
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
@Test
public void testGetCodePoint() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(0x1F4FB, segment.getCodePoint());
segment.setLength(1);
- assertEquals(-1, segment.getCodePoint());
+ assertEquals(0xD83D, segment.getCodePoint());
segment.resetLength();
segment.adjustOffset(1);
- assertEquals(-1, segment.getCodePoint());
+ assertEquals(0xDCFB, segment.getCodePoint());
segment.adjustOffset(1);
assertEquals(0x20, segment.getCodePoint());
}
@Test
public void testCommonPrefixLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
assertEquals(3, segment.getCommonPrefixLength("📻 x"));
import java.text.CharacterIterator;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import org.junit.Test;
import org.junit.runner.RunWith;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.RBBIDataWrapper;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.util.ULocale;
String rtRules = bi.toString(); // getRules() in C++
assertEquals("Break Iterator rule stripping test", "!!forward; $x = [ab#]; '#' '?'; ", rtRules);
}
+
+ @Test
+ public void TestTableRedundancies() {
+ RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH);
+ String rules = bi.toString();
+ bi = new RuleBasedBreakIterator(rules);
+ // Build a break iterator from source rules.
+ // Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
+ RBBIDataWrapper dw = bi.fRData;
+ short[] fwtbl = dw.fFTable;
+ int numCharClasses = dw.fHeader.fCatCount;
+
+ // Check for duplicate columns (character categories)
+ List<String> columns = new ArrayList<String>();
+ for (int column=0; column<numCharClasses; column++) {
+ StringBuilder s = new StringBuilder();
+ for (int r = 1; r < dw.getStateTableNumStates(fwtbl); r++) {
+ int row = dw.getRowIndex(r);
+ short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+ s.append((char)tableVal);
+ }
+ columns.add(s.toString());
+ }
+ // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
+ for (int c1=1; c1<numCharClasses; c1++) {
+ for (int c2 = c1+1; c2 < numCharClasses; c2++) {
+ // assertFalse(String.format("Duplicate columns (%d, %d)", c1, c2), columns.get(c1).equals(columns.get(c2)));
+ if (columns.get(c1).equals(columns.get(c2))) {
+ System.out.printf("Duplicate columns (%d, %d)\n", c1, c2);
+ }
+ }
+ }
+
+ // Check for duplicate states.
+ List<String> rows = new ArrayList<String>();
+ for (int r=0; r<dw.getStateTableNumStates(fwtbl); r++) {
+ StringBuilder s = new StringBuilder();
+ int row = dw.getRowIndex(r);
+ assertTrue("Accepting < -1", fwtbl[row + RBBIDataWrapper.ACCEPTING] >= -1);
+ s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]);
+ s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]);
+ s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]);
+ for (int column=0; column<numCharClasses; column++) {
+ short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+ s.append((char)tableVal);
+ }
+ rows.add(s.toString());
+ }
+
+ for (int r1=0; r1 < dw.getStateTableNumStates(fwtbl); r1++) {
+ for (int r2= r1+1; r2 < dw.getStateTableNumStates(fwtbl); r2++) {
+ // assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
+ if (rows.get(r1).equals(rows.get(r2))) {
+ System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
+ }
+ }
+ }
+ }
}