}
void Edits::reset() {
- length = 0;
+ length = delta = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
}
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
- if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) {
+ if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
+ (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return;
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
- onlyChanges(oc), coarse(crs),
+ onlyChanges_(oc), coarse(crs),
changed(FALSE), oldLength_(0), newLength_(0),
srcIndex(0), replIndex(0), destIndex(0) {}
} else if (head < LENGTH_IN_2TRAIL) {
U_ASSERT(index < length);
U_ASSERT(array[index] >= 0x8000);
- return array[index++];
+ return array[index++] & 0x7fff;
} else {
U_ASSERT((index + 2) <= length);
U_ASSERT(array[index] >= 0x8000);
}
UBool Edits::Iterator::noNext() {
- // Empty span beyond the string.
+ // No change beyond the string.
+ changed = FALSE;
oldLength_ = newLength_ = 0;
return FALSE;
}
-UBool Edits::Iterator::next(UErrorCode &errorCode) {
+UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return FALSE; }
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
if (i < srcIndex) {
// Reset the iterator to the start.
- index = remaining = srcIndex = replIndex = destIndex = 0;
+ index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
}
- while (next(errorCode)) {
+ while (next(FALSE, errorCode)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return TRUE;
* @return TRUE if there is another edit
* @draft ICU 59
*/
- UBool next(UErrorCode &errorCode);
+ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
/**
* Finds the edit that contains the source index.
int32_t readLength(int32_t head);
void updateIndexes();
UBool noNext();
+ UBool next(UBool onlyChanges, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
int32_t remaining;
- UBool onlyChanges, coarse;
+ UBool onlyChanges_, coarse;
UBool changed;
int32_t oldLength_, newLength_;
*/
#include "unicode/std_string.h"
+#include "unicode/edits.h"
#include "unicode/uchar.h"
#include "unicode/ures.h"
#include "unicode/uloc.h"
#include "unicode/tstdtmod.h"
#include "cmemory.h"
+struct EditChange {
+ UBool change;
+ int32_t oldLength, newLength;
+};
+
+class StringCaseTest: public IntlTest {
+public:
+ StringCaseTest();
+ virtual ~StringCaseTest();
+
+ void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
+
+ void TestCaseConversion();
+
+ void TestCasingImpl(const UnicodeString &input,
+ const UnicodeString &output,
+ int32_t whichCase,
+ void *iter, const char *localeID, uint32_t options);
+ void TestCasing();
+ void TestFullCaseFoldingIterator();
+ void TestGreekUpper();
+ void TestLongUpper();
+ void TestMalformedUTF8();
+ void TestBufferOverflow();
+ void TestEdits();
+
+private:
+ void assertGreekUpper(const char *s, const char *expected);
+ void checkEditsIter(
+ const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ EditChange expected[], int32_t expLength, UBool withUnchanged,
+ UErrorCode &errorCode);
+
+ Locale GREEK_LOCALE_;
+};
+
StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
StringCaseTest::~StringCaseTest() {}
+extern IntlTest *createStringCaseTest() {
+ return new StringCaseTest();
+}
+
void
StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
if(exec) {
TESTCASE_AUTO(TestLongUpper);
TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow);
+ TESTCASE_AUTO(TestEdits);
TESTCASE_AUTO_END;
}
errorCode.reset();
#endif // U_HAVE_STD_STRING
}
+
+void StringCaseTest::checkEditsIter(
+ const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
+ EditChange expected[], int32_t expLength, UBool withUnchanged,
+ UErrorCode &errorCode) {
+ assertFalse(name, ei2.findSourceIndex(-1, errorCode));
+
+ char msg[100];
+ int32_t expSrcIndex = 0;
+ int32_t expDestIndex = 0;
+ int32_t expReplIndex = 0;
+ for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
+ sprintf(msg, "%s %d", name, (int)expIndex);
+ if (withUnchanged || expected[expIndex].change) {
+ assertTrue(msg, ei1.next(errorCode));
+ assertEquals(msg, expected[expIndex].change, ei1.hasChange());
+ assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
+ assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ }
+
+ if (expected[expIndex].oldLength > 0) {
+ assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
+ assertEquals(msg, expected[expIndex].change, ei2.hasChange());
+ assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
+ assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
+ assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next(errorCode);
+ ei2.next(errorCode);
+ }
+ }
+
+ expSrcIndex += expected[expIndex].oldLength;
+ expDestIndex += expected[expIndex].newLength;
+ if (expected[expIndex].change) {
+ expReplIndex += expected[expIndex].newLength;
+ }
+ }
+ sprintf(msg, "%s end", name);
+ assertFalse(msg, ei1.next(errorCode));
+ assertFalse(msg, ei1.hasChange());
+ assertEquals(msg, 0, ei1.oldLength());
+ assertEquals(msg, 0, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+
+ assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
+}
+
+void StringCaseTest::TestEdits() {
+ IcuTestErrorCode errorCode(*this, "TestEdits");
+ Edits edits;
+ assertFalse("new Edits", edits.hasChanges());
+ assertEquals("new Edits", 0, edits.lengthDelta());
+ edits.addUnchanged(1); // multiple unchanged ranges are combined
+ edits.addUnchanged(10000); // too long, and they are split
+ edits.addReplace(0, 0);
+ edits.addUnchanged(2);
+ assertFalse("unchanged 10003", edits.hasChanges());
+ assertEquals("unchanged 10003", 0, edits.lengthDelta());
+ edits.addReplace(1, 1); // multiple short equal-length edits are compressed
+ edits.addUnchanged(0);
+ edits.addReplace(1, 1);
+ edits.addReplace(1, 1);
+ edits.addReplace(0, 10);
+ edits.addReplace(100, 0);
+ edits.addReplace(3000, 4000); // variable-length encoding
+ edits.addReplace(100000, 100000);
+ assertTrue("some edits", edits.hasChanges());
+ assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
+ UErrorCode outErrorCode = U_ZERO_ERROR;
+ assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
+
+ EditChange coarseExpectedChanges[] = {
+ { FALSE, 10003, 10003 },
+ { TRUE, 103103, 104013 }
+ };
+ checkEditsIter("coarse",
+ edits.getCoarseIterator(), edits.getCoarseIterator(),
+ coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
+ checkEditsIter("coarse changes",
+ edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
+ coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
+
+ EditChange fineExpectedChanges[] = {
+ { FALSE, 10003, 10003 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 1, 1 },
+ { TRUE, 0, 10 },
+ { TRUE, 100, 0 },
+ { TRUE, 3000, 4000 },
+ { TRUE, 100000, 100000 }
+ };
+ checkEditsIter("fine",
+ edits.getFineIterator(), edits.getFineIterator(),
+ fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
+ checkEditsIter("fine changes",
+ edits.getFineChangesIterator(), edits.getFineChangesIterator(),
+ fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
+
+ edits.reset();
+ assertFalse("reset", edits.hasChanges());
+ assertEquals("reset", 0, edits.lengthDelta());
+ Edits::Iterator ei = edits.getCoarseChangesIterator();
+ assertFalse("reset then iterator", ei.next(errorCode));
+}
UnicodeStringTest::~UnicodeStringTest() {}
+extern IntlTest *createStringCaseTest();
+
void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
{
if (exec) logln("TestSuite UnicodeStringTest: ");
TESTCASE_AUTO_BEGIN;
- TESTCASE_AUTO_CLASS(StringCaseTest);
+ TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
TESTCASE_AUTO(TestBasicManipulation);
TESTCASE_AUTO(TestCompare);
TESTCASE_AUTO(TestExtract);
void TestMoveSwap();
};
-class StringCaseTest: public IntlTest {
-public:
- StringCaseTest();
- virtual ~StringCaseTest();
-
- void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
-
- void TestCaseConversion();
-
- void TestCasingImpl(const UnicodeString &input,
- const UnicodeString &output,
- int32_t whichCase,
- void *iter, const char *localeID, uint32_t options);
- void TestCasing();
- void TestFullCaseFoldingIterator();
- void TestGreekUpper();
- void TestLongUpper();
- void TestMalformedUTF8();
- void TestBufferOverflow();
-
-private:
- void assertGreekUpper(const char *s, const char *expected);
-
- Locale GREEK_LOCALE_;
-};
-
#endif
* @provisional This API might change or be removed in a future release.
*/
public void reset() {
- length = 0;
+ length = delta = 0;
}
private void setLastUnit(int last) {
}
int newDelta = newLength - oldLength;
if (newDelta != 0) {
- if (newDelta > 0 ? newDelta > (Integer.MAX_VALUE - delta) :
- newDelta < (Integer.MIN_VALUE - delta)) {
+ if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
+ (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
// Integer overflow or underflow.
throw new IndexOutOfBoundsException();
}
private int index;
private final int length;
private int remaining;
- private final boolean onlyChanges, coarse;
+ private final boolean onlyChanges_, coarse;
private boolean changed;
private int oldLength_, newLength_;
private Iterator(char[] a, int len, boolean oc, boolean crs) {
array = a;
length = len;
- onlyChanges = oc;
+ onlyChanges_ = oc;
coarse = crs;
}
} else if (head < LENGTH_IN_2TRAIL) {
assert(index < length);
assert(array[index] >= 0x8000);
- return array[index++];
+ return array[index++] & 0x7fff;
} else {
assert((index + 2) <= length);
assert(array[index] >= 0x8000);
}
private boolean noNext() {
- // Empty span beyond the string.
+ // No change beyond the string.
+ changed = false;
oldLength_ = newLength_ = 0;
return false;
}
* @provisional This API might change or be removed in a future release.
*/
public boolean next() {
+ return next(onlyChanges_);
+ }
+
+ private boolean next(boolean onlyChanges) {
// We have an errorCode in case we need to start guarding against integer overflows.
// It is also convenient for caller loops if we bail out when an error was set elsewhere.
updateIndexes();
* even if normal iteration would skip non-changes.
* Normal iteration can continue from a found edit.
*
- * The iterator state before this search logically does not matter.
+ * <p>The iterator state before this search logically does not matter.
* (It may affect the performance of the search.)
*
- * The iterator state after this search is undefined
+ * <p>The iterator state after this search is undefined
* if the source index is out of bounds for the source string.
*
* @param i source index
if (i < 0) { return false; }
if (i < srcIndex) {
// Reset the iterator to the start.
- index = remaining = srcIndex = replIndex = destIndex = 0;
+ index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;
}
- while (next()) {
+ while (next(false)) {
if (i < (srcIndex + oldLength_)) {
// The index is in the current span.
return true;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
}
+ private static final class EditChange {
+ private boolean change;
+ private int oldLength, newLength;
+ EditChange(boolean change, int oldLength, int newLength) {
+ this.change = change;
+ this.oldLength = oldLength;
+ this.newLength = newLength;
+ }
+ }
+
+ private static void checkEditsIter(
+ String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
+ EditChange[] expected, boolean withUnchanged) {
+ assertFalse(name, ei2.findSourceIndex(-1));
+
+ int expSrcIndex = 0;
+ int expDestIndex = 0;
+ int expReplIndex = 0;
+ for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
+ String msg = name + ' ' + expIndex;
+ if (withUnchanged || expected[expIndex].change) {
+ assertTrue(msg, ei1.next());
+ assertEquals(msg, expected[expIndex].change, ei1.hasChange());
+ assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength());
+ assertEquals(msg, expected[expIndex].newLength, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ }
+
+ if (expected[expIndex].oldLength > 0) {
+ assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
+ assertEquals(msg, expected[expIndex].change, ei2.hasChange());
+ assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength());
+ assertEquals(msg, expected[expIndex].newLength, ei2.newLength());
+ assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next();
+ ei2.next();
+ }
+ }
+
+ expSrcIndex += expected[expIndex].oldLength;
+ expDestIndex += expected[expIndex].newLength;
+ if (expected[expIndex].change) {
+ expReplIndex += expected[expIndex].newLength;
+ }
+ }
+ String msg = name + " end";
+ assertFalse(msg, ei1.next());
+ assertFalse(msg, ei1.hasChange());
+ assertEquals(msg, 0, ei1.oldLength());
+ assertEquals(msg, 0, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+
+ assertFalse(name, ei2.findSourceIndex(expSrcIndex));
+ }
+
+ @Test
+ public void TestEdits() {
+ Edits edits = new Edits();
+ assertFalse("new Edits", edits.hasChanges());
+ assertEquals("new Edits", 0, edits.lengthDelta());
+ edits.addUnchanged(1); // multiple unchanged ranges are combined
+ edits.addUnchanged(10000); // too long, and they are split
+ edits.addReplace(0, 0);
+ edits.addUnchanged(2);
+ assertFalse("unchanged 10003", edits.hasChanges());
+ assertEquals("unchanged 10003", 0, edits.lengthDelta());
+ edits.addReplace(1, 1); // multiple short equal-length edits are compressed
+ edits.addUnchanged(0);
+ edits.addReplace(1, 1);
+ edits.addReplace(1, 1);
+ edits.addReplace(0, 10);
+ edits.addReplace(100, 0);
+ edits.addReplace(3000, 4000); // variable-length encoding
+ edits.addReplace(100000, 100000);
+ assertTrue("some edits", edits.hasChanges());
+ assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
+
+ EditChange[] coarseExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 103103, 104013)
+ };
+ checkEditsIter("coarse",
+ edits.getCoarseIterator(), edits.getCoarseIterator(),
+ coarseExpectedChanges, true);
+ checkEditsIter("coarse changes",
+ edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
+ coarseExpectedChanges, false);
+
+ EditChange[] fineExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 0, 10),
+ new EditChange(true, 100, 0),
+ new EditChange(true, 3000, 4000),
+ new EditChange(true, 100000, 100000)
+ };
+ checkEditsIter("fine",
+ edits.getFineIterator(), edits.getFineIterator(),
+ fineExpectedChanges, true);
+ checkEditsIter("fine changes",
+ edits.getFineChangesIterator(), edits.getFineChangesIterator(),
+ fineExpectedChanges, false);
+
+ edits.reset();
+ assertFalse("reset", edits.hasChanges());
+ assertEquals("reset", 0, edits.lengthDelta());
+ Edits.Iterator ei = edits.getCoarseChangesIterator();
+ assertFalse("reset then iterator", ei.next());
+ }
+
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
// private methods -------------------------------------------------------
/**
- * Converting the hex numbers represented betwee n ';' to Unicode strings
+ * Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/