From 7cfbb489bdabe466aba61bb93f4999e4549e1100 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Sat, 25 Mar 2017 07:41:42 +0000 Subject: [PATCH] ICU-13060 Fixing hash code iteration order issues (#13065) and signAlwaysShown parsing. X-SVN-Rev: 39937 --- .../icu/impl/number/AffixPatternUtils.java | 103 ++++++++----- .../ibm/icu/impl/number/PNAffixGenerator.java | 16 +- .../src/com/ibm/icu/impl/number/Parse.java | 144 ++++++++++++------ .../icu/dev/test/format/NumberFormatTest.java | 49 +++++- .../test/number/AffixPatternUtilsTest.java | 28 ++++ 5 files changed, 242 insertions(+), 98 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternUtils.java index 992028dbe87..7b4126ae7d2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/AffixPatternUtils.java @@ -95,45 +95,6 @@ public class AffixPatternUtils { /** Represents a sequence of four or more currency symbols. */ public static final int TYPE_CURRENCY_OVERFLOW = -15; - /** - * Checks whether the specified affix pattern has any unquoted currency symbols ("¤"). - * - * @param patternString The string to check for currency symbols. - * @return true if the literal has at least one unquoted currency symbol; false otherwise. - */ - public static boolean hasCurrencySymbols(CharSequence patternString) { - if (patternString == null) return false; - int offset = 0; - int state = STATE_BASE; - boolean result = false; - for (; offset < patternString.length(); ) { - int cp = Character.codePointAt(patternString, offset); - switch (state) { - case STATE_BASE: - if (cp == '¤') { - result = true; - } else if (cp == '\'') { - state = STATE_INSIDE_QUOTE; - } - break; - case STATE_INSIDE_QUOTE: - if (cp == '\'') { - state = STATE_BASE; - } - break; - default: - throw new AssertionError(); - } - offset += Character.charCount(cp); - } - - if (state == STATE_INSIDE_QUOTE) { - throw new IllegalArgumentException("Unterminated quote: \"" + patternString + "\""); - } else { - return result; - } - } - /** * Estimates the number of code points present in an unescaped version of the affix pattern string * (one that would be returned by {@link #unescape}), assuming that all interpolated symbols @@ -327,6 +288,70 @@ public class AffixPatternUtils { } } + /** + * Checks whether the given affix pattern contains at least one token of the given type, which is + * one of the constants "TYPE_" in {@link AffixPatternUtils}. + * + * @param affixPattern The affix pattern to check. + * @param type The token type. + * @return true if the affix pattern contains the given token type; false otherwise. + */ + public static boolean containsType(CharSequence affixPattern, int type) { + if (affixPattern == null || affixPattern.length() == 0) return false; + long tag = 0L; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern); + if (getTypeOrCp(tag) == type) { + return true; + } + } + return false; + } + + /** + * Checks whether the specified affix pattern has any unquoted currency symbols ("¤"). + * + * @param affixPattern The string to check for currency symbols. + * @return true if the literal has at least one unquoted currency symbol; false otherwise. + */ + public static boolean hasCurrencySymbols(CharSequence affixPattern) { + if (affixPattern == null || affixPattern.length() == 0) return false; + long tag = 0L; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern); + int typeOrCp = getTypeOrCp(tag); + if (typeOrCp == AffixPatternUtils.TYPE_CURRENCY_SINGLE + || typeOrCp == AffixPatternUtils.TYPE_CURRENCY_DOUBLE + || typeOrCp == AffixPatternUtils.TYPE_CURRENCY_TRIPLE + || typeOrCp == AffixPatternUtils.TYPE_CURRENCY_OVERFLOW) { + return true; + } + } + return false; + } + + /** + * Replaces all occurrences of tokens with the given type with the given replacement char. + * + * @param affixPattern The source affix pattern (does not get modified). + * @param type The token type. + * @param replacementChar The char to substitute in place of chars of the given token type. + * @return A string containing the new affix pattern. + */ + public static String replaceType(CharSequence affixPattern, int type, char replacementChar) { + if (affixPattern == null || affixPattern.length() == 0) return ""; + char[] chars = affixPattern.toString().toCharArray(); + long tag = 0L; + while (hasNext(tag, affixPattern)) { + tag = nextToken(tag, affixPattern); + if (getTypeOrCp(tag) == type) { + int offset = getOffset(tag); + chars[offset - 1] = replacementChar; + } + } + return new String(chars); + } + /** * Returns the next token from the affix pattern. * diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PNAffixGenerator.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PNAffixGenerator.java index 7bc2a7417a1..f50704b981e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PNAffixGenerator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PNAffixGenerator.java @@ -116,10 +116,10 @@ public class PNAffixGenerator { return getModifiersWithPlusSign(symbols, curr1, curr2, curr3, properties); } - CharSequence ppp = properties.getPositivePrefixPattern(); - CharSequence psp = properties.getPositiveSuffixPattern(); - CharSequence npp = properties.getNegativePrefixPattern(); - CharSequence nsp = properties.getNegativeSuffixPattern(); + String ppp = properties.getPositivePrefixPattern(); + String psp = properties.getPositiveSuffixPattern(); + String npp = properties.getNegativePrefixPattern(); + String nsp = properties.getNegativeSuffixPattern(); // Set sb1/sb2 to the positive prefix/suffix. sb1.clear(); @@ -151,10 +151,10 @@ public class PNAffixGenerator { String curr3, IProperties properties) { - CharSequence ppp = properties.getPositivePrefixPattern(); - CharSequence psp = properties.getPositiveSuffixPattern(); - CharSequence npp = properties.getNegativePrefixPattern(); - CharSequence nsp = properties.getNegativeSuffixPattern(); + String ppp = properties.getPositivePrefixPattern(); + String psp = properties.getPositiveSuffixPattern(); + String npp = properties.getNegativePrefixPattern(); + String nsp = properties.getNegativeSuffixPattern(); // There are three cases, listed below with their expected outcomes. // TODO: Should we handle the cases when the positive subpattern has a '+' already? diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java index 0640e990a34..92f34e23a56 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Parse.java @@ -717,41 +717,53 @@ public class Parse { static final AffixHolder EMPTY_POSITIVE = new AffixHolder("", "", true, false); static final AffixHolder EMPTY_NEGATIVE = new AffixHolder("", "", true, true); - static final AffixHolder DEFAULT_POSITIVE = new AffixHolder("+", "", false, false); - static final AffixHolder DEFAULT_NEGATIVE = new AffixHolder("-", "", false, true); static void addToState(ParserState state, IProperties properties) { AffixHolder pp = fromPropertiesPositivePattern(properties); AffixHolder np = fromPropertiesNegativePattern(properties); AffixHolder ps = fromPropertiesPositiveString(properties); AffixHolder ns = fromPropertiesNegativeString(properties); - if (pp == null && ps == null) { - if (properties.getSignAlwaysShown()) { - state.affixHolders.add(DEFAULT_POSITIVE); - } else { - state.affixHolders.add(EMPTY_POSITIVE); - } - } else { - if (pp != null) state.affixHolders.add(pp); - if (ps != null) state.affixHolders.add(ps); - } - if (np == null && ns == null) { - state.affixHolders.add(DEFAULT_NEGATIVE); - } else { - if (np != null) state.affixHolders.add(np); - if (ns != null) state.affixHolders.add(ns); - } + if (pp != null) state.affixHolders.add(pp); + if (ps != null) state.affixHolders.add(ps); + if (np != null) state.affixHolders.add(np); + if (ns != null) state.affixHolders.add(ns); } static AffixHolder fromPropertiesPositivePattern(IProperties properties) { String ppp = properties.getPositivePrefixPattern(); String psp = properties.getPositiveSuffixPattern(); + if (properties.getSignAlwaysShown()) { + // TODO: This logic is somewhat duplicated from PNAffixGenerator. + boolean foundSign = false; + String npp = properties.getNegativePrefixPattern(); + String nsp = properties.getNegativeSuffixPattern(); + if (AffixPatternUtils.containsType(npp, AffixPatternUtils.TYPE_MINUS_SIGN)) { + foundSign = true; + ppp = AffixPatternUtils.replaceType(npp, AffixPatternUtils.TYPE_MINUS_SIGN, '+'); + } + if (AffixPatternUtils.containsType(nsp, AffixPatternUtils.TYPE_MINUS_SIGN)) { + foundSign = true; + psp = AffixPatternUtils.replaceType(nsp, AffixPatternUtils.TYPE_MINUS_SIGN, '+'); + } + if (!foundSign) { + ppp = "+" + ppp; + } + } return getInstance(ppp, psp, false, false); } static AffixHolder fromPropertiesNegativePattern(IProperties properties) { String npp = properties.getNegativePrefixPattern(); String nsp = properties.getNegativeSuffixPattern(); + if (npp == null && nsp == null) { + npp = properties.getPositivePrefixPattern(); + nsp = properties.getPositiveSuffixPattern(); + if (npp == null) { + npp = "-"; + } else { + npp = "-" + npp; + } + } return getInstance(npp, nsp, false, true); } @@ -768,7 +780,7 @@ public class Parse { } static AffixHolder getInstance(String p, String s, boolean strings, boolean negative) { - if (p == null && s == null) return null; + if (p == null && s == null) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE; if (p == null) p = ""; if (s == null) s = ""; if (p.length() == 0 && s.length() == 0) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE; @@ -1251,9 +1263,9 @@ public class Parse { break; case INSIDE_AFFIX_PATTERN: - acceptAffixPatternOffset(cp, state, item); + long added = acceptAffixPatternOffset(cp, state, item); // Accept arbitrary bidi and whitespace (if lenient) in the middle of affixes. - if (state.length == 0 && isIgnorable(cp, state)) { + if (added == 0L && isIgnorable(cp, state)) { state.getNext().copyFrom(item, item.name, cp); } break; @@ -1529,20 +1541,18 @@ public class Parse { */ private static void acceptMinusOrPlusSign( int cp, StateName nextName, ParserState state, StateItem item, boolean exponent) { - acceptMinusOrPlusSign(cp, nextName, null, state, item, exponent); + acceptMinusSign(cp, nextName, null, state, item, exponent); + acceptPlusSign(cp, nextName, null, state, item, exponent); } - private static void acceptMinusOrPlusSign( + private static long acceptMinusSign( int cp, StateName returnTo1, StateName returnTo2, ParserState state, StateItem item, boolean exponent) { - if (UNISET_PLUS.contains(cp)) { - StateItem next = state.getNext().copyFrom(item, returnTo1, -1); - next.returnTo1 = returnTo2; - } else if (UNISET_MINUS.contains(cp)) { + if (UNISET_MINUS.contains(cp)) { StateItem next = state.getNext().copyFrom(item, returnTo1, -1); next.returnTo1 = returnTo2; if (exponent) { @@ -1550,6 +1560,25 @@ public class Parse { } else { next.sawNegative = true; } + return 1L << state.lastInsertedIndex(); + } else { + return 0L; + } + } + + private static long acceptPlusSign( + int cp, + StateName returnTo1, + StateName returnTo2, + ParserState state, + StateItem item, + boolean exponent) { + if (UNISET_PLUS.contains(cp)) { + StateItem next = state.getNext().copyFrom(item, returnTo1, -1); + next.returnTo1 = returnTo2; + return 1L << state.lastInsertedIndex(); + } else { + return 0L; } } @@ -1700,29 +1729,33 @@ public class Parse { // At most one item can be added upon consuming a string. if (added != 0) { int i = state.lastInsertedIndex(); - // The following six lines are duplicated below; not enough for their own function. - state.getItem(i).affix = holder; - if (prefix) state.getItem(i).sawPrefix = true; - if (!prefix) state.getItem(i).sawSuffix = true; - if (holder.negative) state.getItem(i).sawNegative = true; - state.getItem(i).score++; // reward for consuming a prefix/suffix. + recordAffixHolder(holder, state.getItem(i), prefix); } } else { long added = acceptAffixPattern(cp, nextName, state, item, str, 0); // Multiple items can be added upon consuming an affix pattern. for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { if (((1L << i) & added) != 0) { - // The following six lines are duplicated above; not enough for their own function. - state.getItem(i).affix = holder; - if (prefix) state.getItem(i).sawPrefix = true; - if (!prefix) state.getItem(i).sawSuffix = true; - if (holder.negative) state.getItem(i).sawNegative = true; - state.getItem(i).score++; // reward for consuming a prefix/suffix. + recordAffixHolder(holder, state.getItem(i), prefix); } } } } + private static void recordAffixHolder(AffixHolder holder, StateItem next, boolean prefix) { + next.affix = holder; + if (prefix) next.sawPrefix = true; + if (!prefix) next.sawSuffix = true; + if (holder.negative) next.sawNegative = true; + // 10 point reward for consuming a prefix/suffix: + next.score += 10; + // 1 point reward for positive holders (if there is ambiguity, we want to favor positive): + if (!holder.negative) next.score += 1; + // 5 point reward for affix holders that have an empty prefix or suffix (we won't see them again): + if (!next.sawPrefix && holder.p.isEmpty()) next.score += 5; + if (!next.sawSuffix && holder.s.isEmpty()) next.score += 5; + } + private static void acceptStringOffset(int cp, ParserState state, StateItem item) { acceptString( cp, @@ -1814,8 +1847,8 @@ public class Parse { return 0L; } - private static void acceptAffixPatternOffset(int cp, ParserState state, StateItem item) { - acceptAffixPattern( + private static long acceptAffixPatternOffset(int cp, ParserState state, StateItem item) { + return acceptAffixPattern( cp, item.returnTo1, state, item, item.currentAffixPattern, item.currentStepwiseParserTag); } @@ -1863,12 +1896,16 @@ public class Parse { resolvedPlusSign = true; break; case AffixPatternUtils.TYPE_PERCENT: - resolvedCp = '%'; // accept ASCII percent as well as locale percent resolvedStr = state.symbols.getPercentString(); + if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '%') { + resolvedCp = '%'; // accept ASCII percent as well as locale percent + } break; case AffixPatternUtils.TYPE_PERMILLE: - resolvedCp = '‰'; // accept ASCII permille as well as locale permille resolvedStr = state.symbols.getPerMillString(); + if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '‰') { + resolvedCp = '‰'; // accept ASCII permille as well as locale permille + } break; case AffixPatternUtils.TYPE_CURRENCY_SINGLE: case AffixPatternUtils.TYPE_CURRENCY_DOUBLE: @@ -1911,22 +1948,29 @@ public class Parse { } added |= 1L << state.lastInsertedIndex(); } - if (resolvedMinusSign || resolvedPlusSign) { - // Sign + if (resolvedMinusSign) { if (hasNext) { - acceptMinusOrPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false); + added |= acceptMinusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false); } else { - acceptMinusOrPlusSign(cp, returnTo, null, state, item, false); + added |= acceptMinusSign(cp, returnTo, null, state, item, false); } - // Decide whether to accept a custom string - if (resolvedMinusSign) { + if (added == 0L) { + // Attempt to accept custom minus sign string String mss = state.symbols.getMinusSignString(); int mssCp = Character.codePointAt(mss, 0); if (mss.length() != Character.charCount(mssCp) || !UNISET_MINUS.contains(mssCp)) { resolvedStr = mss; } } - if (resolvedPlusSign) { + } + if (resolvedPlusSign) { + if (hasNext) { + added |= acceptPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false); + } else { + added |= acceptPlusSign(cp, returnTo, null, state, item, false); + } + if (added == 0L) { + // Attempt to accept custom plus sign string String pss = state.symbols.getPlusSignString(); int pssCp = Character.codePointAt(pss, 0); if (pss.length() != Character.charCount(pssCp) || !UNISET_MINUS.contains(pssCp)) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java index 2a3f9849be9..cef0e73e0e5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -4302,6 +4302,10 @@ public class NumberFormatTest extends TestFmwk { int end = iterator.getRunLimit(); Iterator it = iterator.getAttributes().keySet().iterator(); AttributedCharacterIterator.Attribute attribute = (AttributedCharacterIterator.Attribute) it.next(); + // For positions with both INTEGER and GROUPING attributes, we want the GROUPING attribute. + if (it.hasNext() && attribute.equals(NumberFormat.Field.INTEGER)) { + attribute = (AttributedCharacterIterator.Attribute) it.next(); + } Object value = iterator.getAttribute(attribute); result.add(new FieldContainer(start, end, attribute, value)); iterator.setIndex(end); @@ -5130,6 +5134,36 @@ public class NumberFormatTest extends TestFmwk { assertEquals("Quote should be escapable in padding syntax", "a''12b", result); } + @Test + public void testParseAmbiguousAffixes() { + BigDecimal positive = new BigDecimal("0.0567"); + BigDecimal negative = new BigDecimal("-0.0567"); + DecimalFormat df = new DecimalFormat(); + df.setParseBigDecimal(true); + + String[] patterns = { "+0.00%;-0.00%", "+0.00%;0.00%", "0.00%;-0.00%" }; + String[] inputs = { "+5.67%", "-5.67%", "5.67%" }; + boolean[][] expectedPositive = { + { true, false, true }, + { true, false, false }, + { true, false, true } + }; + + for (int i=0; i " + parsedAct, + 0, parsedExp.compareTo(parsedAct)); } } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixPatternUtilsTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixPatternUtilsTest.java index 3930741eb5a..72c0a55d265 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixPatternUtilsTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/AffixPatternUtilsTest.java @@ -96,6 +96,34 @@ public class AffixPatternUtilsTest { } } + @Test + public void testContainsReplaceType() { + Object[][] cases = { + {"", false, ""}, + {"-", true, "+"}, + {"-a", true, "+a"}, + {"a-", true, "a+"}, + {"a-b", true, "a+b"}, + {"--", true, "++"}, + {"x", false, "x"} + }; + + for (Object[] cas : cases) { + String input = (String) cas[0]; + boolean hasMinusSign = (Boolean) cas[1]; + String output = (String) cas[2]; + + assertEquals( + "Contains on input " + input, + hasMinusSign, + AffixPatternUtils.containsType(input, AffixPatternUtils.TYPE_MINUS_SIGN)); + assertEquals( + "Replace on input" + input, + output, + AffixPatternUtils.replaceType(input, AffixPatternUtils.TYPE_MINUS_SIGN, '+')); + } + } + @Test public void testInvalid() { String[] invalidExamples = {"'", "x'", "'x", "'x''", "''x'"}; -- 2.40.0