From: Markus Scherer Date: Fri, 10 Feb 2017 04:44:37 +0000 (+0000) Subject: ICU-12410 test & fix Edits X-Git-Tag: release-59-rc~145^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4207d7fc263dc90e29f6fb4b07bf84f39585dc21;p=icu ICU-12410 test & fix Edits X-SVN-Rev: 39660 --- diff --git a/icu4c/source/common/edits.cpp b/icu4c/source/common/edits.cpp index 7d216b5acb8..2bc9c220b8b 100644 --- a/icu4c/source/common/edits.cpp +++ b/icu4c/source/common/edits.cpp @@ -40,7 +40,7 @@ Edits::~Edits() { } void Edits::reset() { - length = 0; + length = delta = 0; } void Edits::addUnchanged(int32_t unchangedLength) { @@ -95,7 +95,8 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) { } int32_t newDelta = newLength - oldLength; if (newDelta != 0) { - if (newDelta > 0 ? newDelta > (INT32_MAX - delta) : newDelta < (INT32_MIN - delta)) { + if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || + (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { // Integer overflow or underflow. errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return; @@ -193,7 +194,7 @@ UBool Edits::hasChanges() const { Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : array(a), index(0), length(len), remaining(0), - onlyChanges(oc), coarse(crs), + onlyChanges_(oc), coarse(crs), changed(FALSE), oldLength_(0), newLength_(0), srcIndex(0), replIndex(0), destIndex(0) {} @@ -203,7 +204,7 @@ int32_t Edits::Iterator::readLength(int32_t head) { } else if (head < LENGTH_IN_2TRAIL) { U_ASSERT(index < length); U_ASSERT(array[index] >= 0x8000); - return array[index++]; + return array[index++] & 0x7fff; } else { U_ASSERT((index + 2) <= length); U_ASSERT(array[index] >= 0x8000); @@ -225,12 +226,13 @@ void Edits::Iterator::updateIndexes() { } UBool Edits::Iterator::noNext() { - // Empty span beyond the string. + // No change beyond the string. + changed = FALSE; oldLength_ = newLength_ = 0; return FALSE; } -UBool Edits::Iterator::next(UErrorCode &errorCode) { +UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { if (U_FAILURE(errorCode)) { return FALSE; } // We have an errorCode in case we need to start guarding against integer overflows. // It is also convenient for caller loops if we bail out when an error was set elsewhere. @@ -308,12 +310,12 @@ UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) { if (U_FAILURE(errorCode) || i < 0) { return FALSE; } if (i < srcIndex) { // Reset the iterator to the start. - index = remaining = srcIndex = replIndex = destIndex = 0; + index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; } else if (i < (srcIndex + oldLength_)) { // The index is in the current span. return TRUE; } - while (next(errorCode)) { + while (next(FALSE, errorCode)) { if (i < (srcIndex + oldLength_)) { // The index is in the current span. return TRUE; diff --git a/icu4c/source/common/unicode/edits.h b/icu4c/source/common/unicode/edits.h index 3a7acb1c72a..01d770632b6 100644 --- a/icu4c/source/common/unicode/edits.h +++ b/icu4c/source/common/unicode/edits.h @@ -106,7 +106,7 @@ public: * @return TRUE if there is another edit * @draft ICU 59 */ - UBool next(UErrorCode &errorCode); + UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } /** * Finds the edit that contains the source index. @@ -169,11 +169,12 @@ public: int32_t readLength(int32_t head); void updateIndexes(); UBool noNext(); + UBool next(UBool onlyChanges, UErrorCode &errorCode); const uint16_t *array; int32_t index, length; int32_t remaining; - UBool onlyChanges, coarse; + UBool onlyChanges_, coarse; UBool changed; int32_t oldLength_, newLength_; diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp index 7054b7f1e7a..03e1a72168b 100644 --- a/icu4c/source/test/intltest/strcase.cpp +++ b/icu4c/source/test/intltest/strcase.cpp @@ -19,6 +19,7 @@ */ #include "unicode/std_string.h" +#include "unicode/edits.h" #include "unicode/uchar.h" #include "unicode/ures.h" #include "unicode/uloc.h" @@ -31,10 +32,50 @@ #include "unicode/tstdtmod.h" #include "cmemory.h" +struct EditChange { + UBool change; + int32_t oldLength, newLength; +}; + +class StringCaseTest: public IntlTest { +public: + StringCaseTest(); + virtual ~StringCaseTest(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); + + void TestCaseConversion(); + + void TestCasingImpl(const UnicodeString &input, + const UnicodeString &output, + int32_t whichCase, + void *iter, const char *localeID, uint32_t options); + void TestCasing(); + void TestFullCaseFoldingIterator(); + void TestGreekUpper(); + void TestLongUpper(); + void TestMalformedUTF8(); + void TestBufferOverflow(); + void TestEdits(); + +private: + void assertGreekUpper(const char *s, const char *expected); + void checkEditsIter( + const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators + EditChange expected[], int32_t expLength, UBool withUnchanged, + UErrorCode &errorCode); + + Locale GREEK_LOCALE_; +}; + StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} StringCaseTest::~StringCaseTest() {} +extern IntlTest *createStringCaseTest() { + return new StringCaseTest(); +} + void StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { if(exec) { @@ -50,6 +91,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(TestLongUpper); TESTCASE_AUTO(TestMalformedUTF8); TESTCASE_AUTO(TestBufferOverflow); + TESTCASE_AUTO(TestEdits); TESTCASE_AUTO_END; } @@ -848,3 +890,118 @@ void StringCaseTest::TestBufferOverflow() { errorCode.reset(); #endif // U_HAVE_STD_STRING } + +void StringCaseTest::checkEditsIter( + const char *name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators + EditChange expected[], int32_t expLength, UBool withUnchanged, + UErrorCode &errorCode) { + assertFalse(name, ei2.findSourceIndex(-1, errorCode)); + + char msg[100]; + int32_t expSrcIndex = 0; + int32_t expDestIndex = 0; + int32_t expReplIndex = 0; + for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) { + sprintf(msg, "%s %d", name, (int)expIndex); + if (withUnchanged || expected[expIndex].change) { + assertTrue(msg, ei1.next(errorCode)); + assertEquals(msg, expected[expIndex].change, ei1.hasChange()); + assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength()); + assertEquals(msg, expected[expIndex].newLength, ei1.newLength()); + assertEquals(msg, expSrcIndex, ei1.sourceIndex()); + assertEquals(msg, expDestIndex, ei1.destinationIndex()); + assertEquals(msg, expReplIndex, ei1.replacementIndex()); + } + + if (expected[expIndex].oldLength > 0) { + assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode)); + assertEquals(msg, expected[expIndex].change, ei2.hasChange()); + assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength()); + assertEquals(msg, expected[expIndex].newLength, ei2.newLength()); + assertEquals(msg, expSrcIndex, ei2.sourceIndex()); + assertEquals(msg, expDestIndex, ei2.destinationIndex()); + assertEquals(msg, expReplIndex, ei2.replacementIndex()); + if (!withUnchanged) { + // For some iterators, move past the current range + // so that findSourceIndex() has to look before the current index. + ei2.next(errorCode); + ei2.next(errorCode); + } + } + + expSrcIndex += expected[expIndex].oldLength; + expDestIndex += expected[expIndex].newLength; + if (expected[expIndex].change) { + expReplIndex += expected[expIndex].newLength; + } + } + sprintf(msg, "%s end", name); + assertFalse(msg, ei1.next(errorCode)); + assertFalse(msg, ei1.hasChange()); + assertEquals(msg, 0, ei1.oldLength()); + assertEquals(msg, 0, ei1.newLength()); + assertEquals(msg, expSrcIndex, ei1.sourceIndex()); + assertEquals(msg, expDestIndex, ei1.destinationIndex()); + assertEquals(msg, expReplIndex, ei1.replacementIndex()); + + assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode)); +} + +void StringCaseTest::TestEdits() { + IcuTestErrorCode errorCode(*this, "TestEdits"); + Edits edits; + assertFalse("new Edits", edits.hasChanges()); + assertEquals("new Edits", 0, edits.lengthDelta()); + edits.addUnchanged(1); // multiple unchanged ranges are combined + edits.addUnchanged(10000); // too long, and they are split + edits.addReplace(0, 0); + edits.addUnchanged(2); + assertFalse("unchanged 10003", edits.hasChanges()); + assertEquals("unchanged 10003", 0, edits.lengthDelta()); + edits.addReplace(1, 1); // multiple short equal-length edits are compressed + edits.addUnchanged(0); + edits.addReplace(1, 1); + edits.addReplace(1, 1); + edits.addReplace(0, 10); + edits.addReplace(100, 0); + edits.addReplace(3000, 4000); // variable-length encoding + edits.addReplace(100000, 100000); + assertTrue("some edits", edits.hasChanges()); + assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta()); + UErrorCode outErrorCode = U_ZERO_ERROR; + assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode)); + + EditChange coarseExpectedChanges[] = { + { FALSE, 10003, 10003 }, + { TRUE, 103103, 104013 } + }; + checkEditsIter("coarse", + edits.getCoarseIterator(), edits.getCoarseIterator(), + coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode); + checkEditsIter("coarse changes", + edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), + coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode); + + EditChange fineExpectedChanges[] = { + { FALSE, 10003, 10003 }, + { TRUE, 1, 1 }, + { TRUE, 1, 1 }, + { TRUE, 1, 1 }, + { TRUE, 0, 10 }, + { TRUE, 100, 0 }, + { TRUE, 3000, 4000 }, + { TRUE, 100000, 100000 } + }; + checkEditsIter("fine", + edits.getFineIterator(), edits.getFineIterator(), + fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode); + checkEditsIter("fine changes", + edits.getFineChangesIterator(), edits.getFineChangesIterator(), + fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode); + + edits.reset(); + assertFalse("reset", edits.hasChanges()); + assertEquals("reset", 0, edits.lengthDelta()); + Edits::Iterator ei = edits.getCoarseChangesIterator(); + assertFalse("reset then iterator", ei.next(errorCode)); +} diff --git a/icu4c/source/test/intltest/ustrtest.cpp b/icu4c/source/test/intltest/ustrtest.cpp index 50eb1911c25..74b791c3e09 100644 --- a/icu4c/source/test/intltest/ustrtest.cpp +++ b/icu4c/source/test/intltest/ustrtest.cpp @@ -30,11 +30,13 @@ using namespace std; UnicodeStringTest::~UnicodeStringTest() {} +extern IntlTest *createStringCaseTest(); + void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par) { if (exec) logln("TestSuite UnicodeStringTest: "); TESTCASE_AUTO_BEGIN; - TESTCASE_AUTO_CLASS(StringCaseTest); + TESTCASE_AUTO_CREATE_CLASS(StringCaseTest); TESTCASE_AUTO(TestBasicManipulation); TESTCASE_AUTO(TestCompare); TESTCASE_AUTO(TestExtract); diff --git a/icu4c/source/test/intltest/ustrtest.h b/icu4c/source/test/intltest/ustrtest.h index 37b3a88ea95..7545d9f077e 100644 --- a/icu4c/source/test/intltest/ustrtest.h +++ b/icu4c/source/test/intltest/ustrtest.h @@ -94,30 +94,4 @@ public: void TestMoveSwap(); }; -class StringCaseTest: public IntlTest { -public: - StringCaseTest(); - virtual ~StringCaseTest(); - - void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); - - void TestCaseConversion(); - - void TestCasingImpl(const UnicodeString &input, - const UnicodeString &output, - int32_t whichCase, - void *iter, const char *localeID, uint32_t options); - void TestCasing(); - void TestFullCaseFoldingIterator(); - void TestGreekUpper(); - void TestLongUpper(); - void TestMalformedUTF8(); - void TestBufferOverflow(); - -private: - void assertGreekUpper(const char *s, const char *expected); - - Locale GREEK_LOCALE_; -}; - #endif diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java index b1239527c14..f9cbf9fb4a6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java @@ -52,7 +52,7 @@ public final class Edits { * @provisional This API might change or be removed in a future release. */ public void reset() { - length = 0; + length = delta = 0; } private void setLastUnit(int last) { @@ -125,8 +125,8 @@ public final class Edits { } int newDelta = newLength - oldLength; if (newDelta != 0) { - if (newDelta > 0 ? newDelta > (Integer.MAX_VALUE - delta) : - newDelta < (Integer.MIN_VALUE - delta)) { + if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) || + (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) { // Integer overflow or underflow. throw new IndexOutOfBoundsException(); } @@ -226,7 +226,7 @@ public final class Edits { private int index; private final int length; private int remaining; - private final boolean onlyChanges, coarse; + private final boolean onlyChanges_, coarse; private boolean changed; private int oldLength_, newLength_; @@ -235,7 +235,7 @@ public final class Edits { private Iterator(char[] a, int len, boolean oc, boolean crs) { array = a; length = len; - onlyChanges = oc; + onlyChanges_ = oc; coarse = crs; } @@ -245,7 +245,7 @@ public final class Edits { } else if (head < LENGTH_IN_2TRAIL) { assert(index < length); assert(array[index] >= 0x8000); - return array[index++]; + return array[index++] & 0x7fff; } else { assert((index + 2) <= length); assert(array[index] >= 0x8000); @@ -267,7 +267,8 @@ public final class Edits { } private boolean noNext() { - // Empty span beyond the string. + // No change beyond the string. + changed = false; oldLength_ = newLength_ = 0; return false; } @@ -279,6 +280,10 @@ public final class Edits { * @provisional This API might change or be removed in a future release. */ public boolean next() { + return next(onlyChanges_); + } + + private boolean next(boolean onlyChanges) { // We have an errorCode in case we need to start guarding against integer overflows. // It is also convenient for caller loops if we bail out when an error was set elsewhere. updateIndexes(); @@ -357,10 +362,10 @@ public final class Edits { * even if normal iteration would skip non-changes. * Normal iteration can continue from a found edit. * - * The iterator state before this search logically does not matter. + *

The iterator state before this search logically does not matter. * (It may affect the performance of the search.) * - * The iterator state after this search is undefined + *

The iterator state after this search is undefined * if the source index is out of bounds for the source string. * * @param i source index @@ -372,12 +377,12 @@ public final class Edits { if (i < 0) { return false; } if (i < srcIndex) { // Reset the iterator to the start. - index = remaining = srcIndex = replIndex = destIndex = 0; + index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; } else if (i < (srcIndex + oldLength_)) { // The index is in the current span. return true; } - while (next()) { + while (next(false)) { if (i < (srcIndex + oldLength_)) { // The index is in the current span. return true; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java index 7ac358b51f2..d72b8ddc40c 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java @@ -24,6 +24,7 @@ import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; import com.ibm.icu.text.BreakIterator; +import com.ibm.icu.text.Edits; import com.ibm.icu.text.RuleBasedBreakIterator; import com.ibm.icu.text.UTF16; import com.ibm.icu.util.ULocale; @@ -708,6 +709,127 @@ public final class UCharacterCaseTest extends TestFmwk assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ"); } + private static final class EditChange { + private boolean change; + private int oldLength, newLength; + EditChange(boolean change, int oldLength, int newLength) { + this.change = change; + this.oldLength = oldLength; + this.newLength = newLength; + } + } + + private static void checkEditsIter( + String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators + EditChange[] expected, boolean withUnchanged) { + assertFalse(name, ei2.findSourceIndex(-1)); + + int expSrcIndex = 0; + int expDestIndex = 0; + int expReplIndex = 0; + for (int expIndex = 0; expIndex < expected.length; ++expIndex) { + String msg = name + ' ' + expIndex; + if (withUnchanged || expected[expIndex].change) { + assertTrue(msg, ei1.next()); + assertEquals(msg, expected[expIndex].change, ei1.hasChange()); + assertEquals(msg, expected[expIndex].oldLength, ei1.oldLength()); + assertEquals(msg, expected[expIndex].newLength, ei1.newLength()); + assertEquals(msg, expSrcIndex, ei1.sourceIndex()); + assertEquals(msg, expDestIndex, ei1.destinationIndex()); + assertEquals(msg, expReplIndex, ei1.replacementIndex()); + } + + if (expected[expIndex].oldLength > 0) { + assertTrue(msg, ei2.findSourceIndex(expSrcIndex)); + assertEquals(msg, expected[expIndex].change, ei2.hasChange()); + assertEquals(msg, expected[expIndex].oldLength, ei2.oldLength()); + assertEquals(msg, expected[expIndex].newLength, ei2.newLength()); + assertEquals(msg, expSrcIndex, ei2.sourceIndex()); + assertEquals(msg, expDestIndex, ei2.destinationIndex()); + assertEquals(msg, expReplIndex, ei2.replacementIndex()); + if (!withUnchanged) { + // For some iterators, move past the current range + // so that findSourceIndex() has to look before the current index. + ei2.next(); + ei2.next(); + } + } + + expSrcIndex += expected[expIndex].oldLength; + expDestIndex += expected[expIndex].newLength; + if (expected[expIndex].change) { + expReplIndex += expected[expIndex].newLength; + } + } + String msg = name + " end"; + assertFalse(msg, ei1.next()); + assertFalse(msg, ei1.hasChange()); + assertEquals(msg, 0, ei1.oldLength()); + assertEquals(msg, 0, ei1.newLength()); + assertEquals(msg, expSrcIndex, ei1.sourceIndex()); + assertEquals(msg, expDestIndex, ei1.destinationIndex()); + assertEquals(msg, expReplIndex, ei1.replacementIndex()); + + assertFalse(name, ei2.findSourceIndex(expSrcIndex)); + } + + @Test + public void TestEdits() { + Edits edits = new Edits(); + assertFalse("new Edits", edits.hasChanges()); + assertEquals("new Edits", 0, edits.lengthDelta()); + edits.addUnchanged(1); // multiple unchanged ranges are combined + edits.addUnchanged(10000); // too long, and they are split + edits.addReplace(0, 0); + edits.addUnchanged(2); + assertFalse("unchanged 10003", edits.hasChanges()); + assertEquals("unchanged 10003", 0, edits.lengthDelta()); + edits.addReplace(1, 1); // multiple short equal-length edits are compressed + edits.addUnchanged(0); + edits.addReplace(1, 1); + edits.addReplace(1, 1); + edits.addReplace(0, 10); + edits.addReplace(100, 0); + edits.addReplace(3000, 4000); // variable-length encoding + edits.addReplace(100000, 100000); + assertTrue("some edits", edits.hasChanges()); + assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta()); + + EditChange[] coarseExpectedChanges = new EditChange[] { + new EditChange(false, 10003, 10003), + new EditChange(true, 103103, 104013) + }; + checkEditsIter("coarse", + edits.getCoarseIterator(), edits.getCoarseIterator(), + coarseExpectedChanges, true); + checkEditsIter("coarse changes", + edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), + coarseExpectedChanges, false); + + EditChange[] fineExpectedChanges = new EditChange[] { + new EditChange(false, 10003, 10003), + new EditChange(true, 1, 1), + new EditChange(true, 1, 1), + new EditChange(true, 1, 1), + new EditChange(true, 0, 10), + new EditChange(true, 100, 0), + new EditChange(true, 3000, 4000), + new EditChange(true, 100000, 100000) + }; + checkEditsIter("fine", + edits.getFineIterator(), edits.getFineIterator(), + fineExpectedChanges, true); + checkEditsIter("fine changes", + edits.getFineChangesIterator(), edits.getFineChangesIterator(), + fineExpectedChanges, false); + + edits.reset(); + assertFalse("reset", edits.hasChanges()); + assertEquals("reset", 0, edits.lengthDelta()); + Edits.Iterator ei = edits.getCoarseChangesIterator(); + assertFalse("reset then iterator", ei.next()); + } + // private data members - test data -------------------------------------- private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR"); @@ -945,7 +1067,7 @@ public final class UCharacterCaseTest extends TestFmwk // private methods ------------------------------------------------------- /** - * Converting the hex numbers represented betwee n ';' to Unicode strings + * Converting the hex numbers represented between ';' to Unicode strings * @param str string to break up into Unicode strings * @return array of Unicode strings ending with a null */