public int length(int flags);
+ public String getString(int flags);
+
public boolean hasCurrencySign();
public boolean positiveHasPlusSign();
return new String(chars);
}
+ /**
+ * Appends a new affix pattern with all symbols removed. Like calling unescape with a symbol provider that always
+ * returns the empty string.
+ */
+ public static void removeSymbols(CharSequence affixPattern, StringBuilder output) {
+ assert affixPattern != null;
+ long tag = 0L;
+ while (hasNext(tag, affixPattern)) {
+ tag = nextToken(tag, affixPattern);
+ int typeOrCp = getTypeOrCp(tag);
+ if (typeOrCp >= 0) {
+ output.appendCodePoint(typeOrCp);
+ }
+ }
+ }
+
/**
* Returns the next token from the affix pattern.
*
return affixesByPlural[pluralOrdinal].length(flags);
}
+ @Override
+ public String getString(int flags) {
+ int pluralOrdinal = (flags & Flags.PLURAL_MASK);
+ return affixesByPlural[pluralOrdinal].getString(flags);
+ }
+
@Override
public boolean positiveHasPlusSign() {
return affixesByPlural[StandardPlural.OTHER.ordinal()].positiveHasPlusSign();
return right - left;
}
+ @Override
public String getString(int flags) {
long endpoints = getEndpoints(flags);
int left = (int) (endpoints & 0xffffffff);
@Override
public char charAt(int flags, int i) {
- return getStringForFlags(flags).charAt(i);
+ return getString(flags).charAt(i);
}
@Override
public int length(int flags) {
- return getStringForFlags(flags).length();
+ return getString(flags).length();
}
- private String getStringForFlags(int flags) {
+ @Override
+ public String getString(int flags) {
boolean prefix = (flags & Flags.PREFIX) != 0;
boolean negative = (flags & Flags.NEGATIVE_SUBPATTERN) != 0;
if (prefix && negative) {
import java.util.TreeSet;
import com.ibm.icu.impl.StandardPlural;
+import com.ibm.icu.impl.number.AffixPatternProvider;
+import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.MutablePatternModifier;
import com.ibm.icu.impl.number.NumberStringBuilder;
}
}
+ public static void generateFromAffixPatternProvider(AffixPatternProvider patternInfo,
+ NumberParserImpl output,
+ boolean includeUnpaired) {
+ AffixMatcher positive = null;
+ AffixMatcher negative = null;
+
+ StringBuilder sb = new StringBuilder();
+ AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.PREFIX), sb);
+ String prefix = sb.toString();
+ sb.setLength(0);
+ AffixUtils.removeSymbols(patternInfo.getString(/* suffix */ 0), sb);
+ String suffix = sb.toString();
+ if (!prefix.isEmpty() || !suffix.isEmpty()) {
+ positive = new AffixMatcher(prefix, suffix, 0);
+ }
+
+ if (patternInfo.hasNegativeSubpattern()) {
+ sb.setLength(0);
+ AffixUtils.removeSymbols(patternInfo
+ .getString(AffixPatternProvider.Flags.PREFIX | AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
+ prefix = sb.toString();
+ sb.setLength(0);
+ AffixUtils.removeSymbols(patternInfo.getString(AffixPatternProvider.Flags.NEGATIVE_SUBPATTERN), sb);
+ suffix = sb.toString();
+ if (!prefix.isEmpty() || !suffix.isEmpty()) {
+ negative = new AffixMatcher(prefix, suffix, ParsedNumber.FLAG_NEGATIVE);
+ }
+ }
+
+ if (positive != null && negative != null) {
+ int comparison = COMPARATOR.compare(positive, negative);
+ if (comparison > 0) {
+ appendTo(negative, output, includeUnpaired);
+ appendTo(positive, output, includeUnpaired);
+ } else if (comparison < 0) {
+ appendTo(positive, output, includeUnpaired);
+ appendTo(negative, output, includeUnpaired);
+ } else {
+ // The two candidates are equal; favor the positive one
+ appendTo(positive, output, includeUnpaired);
+ }
+ } else if (positive != null) {
+ appendTo(positive, output, includeUnpaired);
+ } else if (negative != null) {
+ appendTo(negative, output, includeUnpaired);
+ } else {
+ // No affixes to append this time
+ }
+ }
+
+ private static void appendTo(AffixMatcher matcher, NumberParserImpl output, boolean includeUnpaired) {
+ output.addMatcher(matcher);
+ if (includeUnpaired && !matcher.prefix.isEmpty() && !matcher.suffix.isEmpty()) {
+ output.addMatcher(new AffixMatcher(matcher.prefix, "", matcher.flags));
+ output.addMatcher(new AffixMatcher("", matcher.suffix, matcher.flags));
+ }
+ }
+
/**
* Constructs one or more AffixMatchers from the given MutablePatternModifier and flags, appending them to the given
* collection. The NumberStringBuilder is used as a temporary object only.
// Set up a pattern modifier with mostly defaults to generate AffixMatchers.
MutablePatternModifier mod = new MutablePatternModifier(false);
AffixPatternProvider patternInfo = new PropertiesAffixPatternProvider(properties);
- mod.setPatternInfo(patternInfo);
- mod.setPatternAttributes(SignDisplay.AUTO, false);
- mod.setSymbols(symbols, currency, UnitWidth.SHORT, null);
-
- // Figure out which flags correspond to this pattern modifier. Note: negatives are taken care of in the
- // generateFromPatternModifier function.
- int flags = 0;
- if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
- flags |= ParsedNumber.FLAG_PERCENT;
- }
- if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
- flags |= ParsedNumber.FLAG_PERMILLE;
- }
- if (patternInfo.hasCurrencySign()) {
- flags |= ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY;
- }
-
- parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
-
- AffixMatcher.generateFromPatternModifier(mod, flags, !isStrict && !parseCurrency, parser);
+// mod.setPatternInfo(patternInfo);
+// mod.setPatternAttributes(SignDisplay.AUTO, false);
+// mod.setSymbols(symbols, currency, UnitWidth.SHORT, null);
+//
+// // Figure out which flags correspond to this pattern modifier. Note: negatives are taken care of in the
+// // generateFromPatternModifier function.
+// int flags = 0;
+// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERCENT)) {
+// flags |= ParsedNumber.FLAG_PERCENT;
+// }
+// if (patternInfo.containsSymbolType(AffixUtils.TYPE_PERMILLE)) {
+// flags |= ParsedNumber.FLAG_PERMILLE;
+// }
+// if (patternInfo.hasCurrencySign()) {
+// flags |= ParsedNumber.FLAG_HAS_DEFAULT_CURRENCY;
+// }
+//
+// parseCurrency = parseCurrency || patternInfo.hasCurrencySign();
+//
+// AffixMatcher.generateFromPatternModifier(mod, flags, !isStrict && !parseCurrency, parser);
+
+ AffixMatcher.generateFromAffixPatternProvider(patternInfo, parser, !isStrict);
///////////////////////////////
/// OTHER STANDARD MATCHERS ///
if (!isStrict) {
parser.addMatcher(WhitespaceMatcher.getInstance());
+ }
+ if (!isStrict || patternInfo.containsSymbolType(AffixUtils.TYPE_PLUS_SIGN)) {
parser.addMatcher(new PlusSignMatcher());
}
parser.addMatcher(new MinusSignMatcher());
parser.setIgnoreCase(!properties.getParseCaseSensitive());
+ System.out.println(parser);
+
parser.freeze();
return parser;
}
if (charAt(offset) != other.charAt(offset)) {
break;
}
+ offset++;
}
}
return offset;
set locale en
set pattern '-'#y
begin
-parse output
--45y 45
+parse output breaks
+// FIXME
+-45y 45 P
test parse with locale symbols
// The grouping separator in it_CH is an apostrophe
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-// Right now, P will not parse the affix unless it contains the exact currency GBP.
-(7.92) USD -7.92 USD P
+(7.92) USD -7.92 USD
(7.92) GBP -7.92 GBP
-(7.926) USD -7.926 USD P
-(7.926 USD) -7.926 USD CJP
-(USD 7.926) -7.926 USD CJP
-USD (7.926) -7.926 USD CJP
-USD (7.92) -7.92 USD CJP
+(7.926) USD -7.926 USD
+(7.926 USD) -7.926 USD CJ
+(USD 7.926) -7.926 USD CJ
+USD (7.926) -7.926 USD CJ
+USD (7.92) -7.92 USD CJ
(7.92)USD -7.92 USD CJP
-USD(7.92) -7.92 USD CJP
-(8) USD -8 USD P
+USD(7.92) -7.92 USD CJ
+(8) USD -8 USD
-8 USD -8 USD C
67 USD 67 USD
53.45$ fail USD
US Dollar53.45 53.45 USD
US Dollat53.45 fail USD
53.45US Dollar 53.45 USD CJ
-US Dollars (53.45) -53.45 USD CJP
-(53.45) US Dollars -53.45 USD P
-(53.45) Euros -53.45 EUR P
-US Dollar (53.45) -53.45 USD CJP
-(53.45) US Dollar -53.45 USD P
-US Dollars(53.45) -53.45 USD CJP
+US Dollars (53.45) -53.45 USD CJ
+(53.45) US Dollars -53.45 USD
+(53.45) Euros -53.45 EUR
+US Dollar (53.45) -53.45 USD CJ
+(53.45) US Dollar -53.45 USD
+US Dollars(53.45) -53.45 USD CJ
(53.45)US Dollars -53.45 USD CJP
-US Dollar(53.45) -53.45 USD CJP
+US Dollar(53.45) -53.45 USD CJ
US Dollat(53.45) fail USD
(53.45)US Dollar -53.45 USD CJP
53.45 fail GBP
£53.45 53.45 GBP
$53.45 fail USD J
-53.45 USD 53.45 USD
-53.45 GBP 53.45 GBP
+// FIXME: Fix the failures in this section. Positive/negative mixup.
+53.45 USD 53.45 USD P
+53.45 GBP 53.45 GBP P
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
-// FIXME: Fix this one
--7.92 USD -7.92 USD P
+-7.92 USD -7.92 USD
-7.92 GBP -7.92 GBP
-// FIXME: Fix this one
--7.926 USD -7.926 USD P
-USD -7.926 -7.926 USD CJP
--7.92USD -7.92 USD CJP
-USD-7.92 -7.92 USD CJP
--8 USD -8 USD P
-67 USD 67 USD
+-7.926 USD -7.926 USD
+USD -7.926 -7.926 USD CJ
+-7.92USD -7.92 USD CJ
+USD-7.92 -7.92 USD CJ
+-8 USD -8 USD
+67 USD 67 USD P
53.45$ fail USD
US Dollars 53.45 53.45 USD J
-53.45 US Dollars 53.45 USD
+53.45 US Dollars 53.45 USD P
US Dollar 53.45 53.45 USD J
-53.45 US Dollar 53.45 USD
+53.45 US Dollar 53.45 USD P
US Dollars53.45 53.45 USD
53.45US Dollars 53.45 USD CJ
US Dollar53.45 53.45 USD
assertEquals("Symbol provider into middle", "abcd123efg", sb.toString());
}
+ @Test
+ public void testRemoveSymbols() {
+ String[][] cases = {
+ {"", ""},
+ {"-", ""},
+ {"'-'", "-"},
+ {"-a+b%c‰d¤e¤¤f¤¤¤g¤¤¤¤h¤¤¤¤¤", "abcdefgh"},
+ };
+
+ StringBuilder sb = new StringBuilder();
+ for (String[] cas : cases) {
+ String input = cas[0];
+ String expected = cas[1];
+ sb.setLength(0);
+ AffixUtils.removeSymbols(input, sb);
+ assertEquals("Removing symbols from: " + input, expected, sb.toString());
+ }
+ }
+
private static String unescapeWithDefaults(String input) {
NumberStringBuilder nsb = new NumberStringBuilder();
int length = AffixUtils.unescape(input, nsb, 0, DEFAULT_SYMBOL_PROVIDER);
@Test
public void testOffset() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0, segment.getOffset());
segment.adjustOffset(3);
assertEquals(3, segment.getOffset());
@Test
public void testLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(11, segment.length());
segment.adjustOffset(3);
assertEquals(8, segment.length());
@Test
public void testCharAt() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertCharSequenceEquals(SAMPLE_STRING, segment);
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
@Test
public void testGetCodePoint() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals(-1, segment.getCodePoint());
@Test
public void testIsLeadingSurrogate() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertFalse(segment.isLeadingSurrogate());
segment.setLength(1);
assertTrue(segment.isLeadingSurrogate());
@Test
public void testCommonPrefixLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
assertEquals(3, segment.getCommonPrefixLength("📻 x"));
assertEquals(0, segment.getCommonPrefixLength("x"));
assertEquals(0, segment.getCommonPrefixLength(""));
segment.adjustOffset(3);
+ assertEquals(0, segment.getCommonPrefixLength("RADiO"));
assertEquals(5, segment.getCommonPrefixLength("radio"));
assertEquals(2, segment.getCommonPrefixLength("rafio"));
assertEquals(0, segment.getCommonPrefixLength("fadio"));
assertEquals(0, segment.getCommonPrefixLength("foo"));
}
+ @Test
+ public void testIgnoreCase() {
+ StringSegment segment = new StringSegment(SAMPLE_STRING, true);
+ assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
+ assertEquals(0, segment.getCommonPrefixLength("x"));
+ segment.setOffset(3);
+ assertEquals(5, segment.getCommonPrefixLength("RAdiO"));
+ }
+
private static void assertCharSequenceEquals(CharSequence a, CharSequence b) {
assertEquals(a.length(), b.length());
for (int i = 0; i < a.length(); i++) {