// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.impl.number.parse;
+package com.ibm.icu.impl;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
/**
- * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
- * subSequence methods all operate relative to the fixed offset into the String.
+ * A mutable String wrapper with a variable offset and length and support for case folding.
+ * <p>
+ * The charAt, length, and subSequence methods all operate relative to the fixed offset into the String.
+ * <p>
+ * CAUTION: Since this class is mutable, it must not be used anywhere that an immutable object is
+ * required, like in a cache or as the key of a hash map.
*
* @author sffc
*/
private int end;
private boolean foldCase;
- public StringSegment(String str, int parseFlags) {
+ public StringSegment(String str, boolean foldCase) {
this.str = str;
this.start = 0;
this.end = str.length();
- this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
+ this.foldCase = foldCase;
}
public int getOffset() {
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
*
* <p>
- * This method is usually called by a Matcher to register that a char was consumed. If the char is
- * strong (it usually is, except for things like whitespace), follow this with a call to
- * {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
+ * Number parsing note: This method is usually called by a Matcher to register that a char was
+ * consumed. If the char is strong (it usually is, except for things like whitespace), follow this
+ * with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that
+ * method.
*/
public void adjustOffset(int delta) {
assert start + delta >= 0;
}
/**
- * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
+ * Adjusts the offset by the width of the current lead code point, either 1 or 2 chars.
*/
public void adjustOffsetByCodePoint() {
start += Character.charCount(getCodePoint());
* code point.
*
* <p>
- * <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
- * folding logic, instead of this method.
+ * <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles
+ * case folding logic, instead of this method.
*/
public int getCodePoint() {
assert start < end;
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
- public boolean matches(int otherCp) {
+ public boolean startsWith(int otherCp) {
return codePointsEqual(getCodePoint(), otherCp, foldCase);
}
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
- public boolean matches(UnicodeSet uniset) {
+ public boolean startsWith(UnicodeSet uniset) {
// TODO: Move UnicodeSet case-folding logic here.
// TODO: Handle string matches here instead of separately.
int cp = getCodePoint();
* since the first 2 characters are the same.
*
* <p>
- * This method will perform case folding if case folding is enabled for the parser.
+ * This method only returns offsets along code point boundaries.
+ *
+ * <p>
+ * This method will perform case folding if case folding was enabled in the constructor.
*/
public int getCommonPrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, foldCase);
}
/**
- * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
- * enabled for the parser.
+ * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was
+ * enabled in the constructor.
*/
public int getCaseSensitivePrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, false);
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
int offset = 0;
for (; offset < Math.min(length(), other.length());) {
- // TODO: case-fold code points, not chars
- char c1 = charAt(offset);
- char c2 = other.charAt(offset);
- if (!codePointsEqual(c1, c2, foldCase)) {
+ int cp1 = Character.codePointAt(this, offset);
+ int cp2 = Character.codePointAt(other, offset);
+ if (!codePointsEqual(cp1, cp2, foldCase)) {
break;
}
- offset++;
+ offset += Character.charCount(cp1);
}
return offset;
}
- // /**
- // * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
- // */
- // public static String maybeFold(String input, int parseFlags) {
- // UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
- // if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
- // return UCharacter.foldCase(input, true);
- // } else {
- // return input;
- // }
- // }
-
private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
if (cp1 == cp2) {
return true;
return cp1 == cp2;
}
+ /**
+ * Equals any CharSequence with the same chars as this segment.
+ *
+ * <p>
+ * This method does not perform case folding; if you want case-insensitive equality, use
+ * {@link #getCommonPrefixLength}.
+ */
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof CharSequence))
+ return false;
+ return Utility.charSequenceEquals(this, (CharSequence) other);
+ }
+
+ /** Returns a hash code equivalent to calling .toString().hashCode() */
+ @Override
+ public int hashCode() {
+ return Utility.charSequenceHashCode(this);
+ }
+
@Override
public String toString() {
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
}
return r;
}
+
+ /**
+ * Returns whether the chars in the two CharSequences are equal.
+ */
+ public static boolean charSequenceEquals(CharSequence a, CharSequence b) {
+ if (a == b) {
+ return true;
+ }
+ if (a == null || b == null) {
+ return false;
+ }
+ if (a.length() != b.length()) {
+ return false;
+ }
+ for (int i = 0; i < a.length(); i++) {
+ if (a.charAt(i) != b.charAt(i))
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Returns a hash code for a CharSequence that is equivalent to calling
+ * charSequence.toString().hashCode()
+ */
+ public static int charSequenceHashCode(CharSequence value) {
+ int hash = 0;
+ for (int i = 0; i < value.length(); i++) {
+ hash = hash * 31 + value.charAt(i);
+ }
+ return hash;
+ }
}
import java.util.Comparator;
import com.ibm.icu.impl.StandardPlural;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import java.util.ArrayList;
import java.util.List;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
- if (segment.matches(cp)) {
+ if (segment.startsWith(cp)) {
segment.adjustOffsetByCodePoint();
result.setCharsConsumed(segment);
}
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;
import java.util.Iterator;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
import java.util.Comparator;
import java.util.List;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
assert frozen;
assert start >= 0 && start < input.length();
- StringSegment segment = new StringSegment(input, parseFlags);
+ StringSegment segment = new StringSegment(input,
+ 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result);
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
import java.math.BigDecimal;
import java.util.Comparator;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.Grouper;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
// Allow a sign, and then try to match digits.
boolean minusSign = false;
- if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
+ if (segment.startsWith(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
minusSign = true;
segment.adjustOffsetByCodePoint();
- } else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
+ } else if (segment.startsWith(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
segment.adjustOffsetByCodePoint();
}
import java.util.ArrayList;
import java.util.List;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
}
}
- if (segment.matches(uniSet)) {
+ if (segment.startsWith(uniSet)) {
segment.adjustOffsetByCodePoint();
accept(segment, result);
return false;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.UnicodeSet;
/**
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-package com.ibm.icu.dev.test.number;
+package com.ibm.icu.dev.test.impl;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
-import com.ibm.icu.impl.number.parse.StringSegment;
+import com.ibm.icu.impl.StringSegment;
/**
* @author sffc
@Test
public void testOffset() {
- StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0, segment.getOffset());
- segment.adjustOffset(3);
+ segment.adjustOffsetByCodePoint();
+ assertEquals(2, segment.getOffset());
+ segment.adjustOffset(1);
assertEquals(3, segment.getOffset());
segment.adjustOffset(2);
assertEquals(5, segment.getOffset());
@Test
public void testLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(11, segment.length());
segment.adjustOffset(3);
assertEquals(8, segment.length());
@Test
public void testCharAt() {
- StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertCharSequenceEquals(SAMPLE_STRING, segment);
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
@Test
public void testGetCodePoint() {
- StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, false);
assertEquals(0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals(0xD83D, segment.getCodePoint());
@Test
public void testCommonPrefixLength() {
- StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
+ StringSegment segment = new StringSegment(SAMPLE_STRING, true);
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
assertEquals(3, segment.getCommonPrefixLength("📻 x"));
assertEquals(0, segment.getCommonPrefixLength("x"));
assertEquals(0, segment.getCommonPrefixLength(""));
segment.adjustOffset(3);
- assertEquals(0, segment.getCommonPrefixLength("RADiO"));
+ assertEquals(5, segment.getCommonPrefixLength("raDio"));
assertEquals(5, segment.getCommonPrefixLength("radio"));
assertEquals(2, segment.getCommonPrefixLength("rafio"));
assertEquals(0, segment.getCommonPrefixLength("fadio"));
assertEquals(0, segment.getCommonPrefixLength(""));
+ assertEquals(5, segment.getCaseSensitivePrefixLength("radio"));
+ assertEquals(2, segment.getCaseSensitivePrefixLength("raDio"));
segment.setLength(3);
assertEquals(3, segment.getCommonPrefixLength("radio"));
assertEquals(2, segment.getCommonPrefixLength("rafio"));
import org.junit.Test;
+import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.PercentMatcher;
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
import com.ibm.icu.impl.number.parse.SeriesMatcher;
-import com.ibm.icu.impl.number.parse.StringSegment;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
import com.ibm.icu.text.DecimalFormatSymbols;
int expectedOffset = (Integer) cas[1];
boolean expectedMaybeMore = (Boolean) cas[2];
- StringSegment segment = new StringSegment(input, 0);
+ StringSegment segment = new StringSegment(input, false);
ParsedNumber result = new ParsedNumber();
boolean actualMaybeMore = series.match(segment, result);
int actualOffset = segment.getOffset();
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
+import java.util.Random;
import java.util.Set;
import org.junit.Test;
public String CheckSourceLocale() {
return TestFmwk.sourceLocation();
}
+
+ static final String RANDOM_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ static final Random RANDOM = new Random(2018);
+
+ @Test
+ public void TestCharSequenceEqualsAndHashCode() {
+ for (int t=0; t<1000; t++) {
+ int length = RANDOM.nextInt(5);
+ CharSequence a = randomCharSequence(length);
+ CharSequence b = randomCharSequence(length);
+ CharSequence c = randomCharSequence(length + 3);
+ String message = "a=" + a + "; b=" + b + "; c=" + c;
+
+ assertTrue(message, Utility.charSequenceEquals(a, a));
+ assertFalse(message, Utility.charSequenceEquals(a, c));
+ assertTrue(message, Utility.charSequenceEquals(b, b));
+ assertFalse(message, Utility.charSequenceEquals(b, c));
+ assertFalse(message, Utility.charSequenceEquals(c, a));
+ assertFalse(message, Utility.charSequenceEquals(c, b));
+ assertTrue(message, Utility.charSequenceEquals(c, c));
+ if (length == 0 || a.toString().equals(b.toString())) {
+ assertTrue(message, Utility.charSequenceEquals(a, b));
+ assertTrue(message, Utility.charSequenceEquals(b, a));
+ } else {
+ assertFalse(message, Utility.charSequenceEquals(a, b));
+ assertFalse(message, Utility.charSequenceEquals(b, a));
+ }
+
+ assertEquals(message, Utility.charSequenceHashCode(a), a.toString().hashCode());
+ assertEquals(message, Utility.charSequenceHashCode(b), b.toString().hashCode());
+ assertEquals(message, Utility.charSequenceHashCode(c), c.toString().hashCode());
+ }
+ }
+
+ private CharSequence randomCharSequence(int length) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<length; i++) {
+ sb.append(RANDOM_CHARS.charAt(RANDOM.nextInt(RANDOM_CHARS.length())));
+ }
+ return sb;
+ }
}