} // namespace
-Edits::~Edits() {
- if(array != stackArray) {
+void Edits::releaseArray() U_NOEXCEPT {
+ if (array != stackArray) {
uprv_free(array);
}
}
-void Edits::reset() {
+Edits &Edits::copyArray(const Edits &other) {
+ if (U_FAILURE(errorCode_)) {
+ length = delta = numChanges = 0;
+ return *this;
+ }
+ if (length > capacity) {
+ uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
+ if (newArray == nullptr) {
+ length = delta = numChanges = 0;
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+ return *this;
+ }
+ releaseArray();
+ array = newArray;
+ capacity = length;
+ }
+ if (length > 0) {
+ uprv_memcpy(array, other.array, (size_t)length * 2);
+ }
+ return *this;
+}
+
+Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
+ if (U_FAILURE(errorCode_)) {
+ length = delta = numChanges = 0;
+ return *this;
+ }
+ releaseArray();
+ if (length > STACK_CAPACITY) {
+ array = src.array;
+ capacity = src.capacity;
+ src.array = src.stackArray;
+ src.capacity = STACK_CAPACITY;
+ src.reset();
+ return *this;
+ }
+ array = stackArray;
+ capacity = STACK_CAPACITY;
+ if (length > 0) {
+ uprv_memcpy(array, src.array, (size_t)length * 2);
+ }
+ return *this;
+}
+
+Edits &Edits::operator=(const Edits &other) {
+ length = other.length;
+ delta = other.delta;
+ numChanges = other.numChanges;
+ errorCode_ = other.errorCode_;
+ return copyArray(other);
+}
+
+Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
+ length = src.length;
+ delta = src.delta;
+ numChanges = src.numChanges;
+ errorCode_ = src.errorCode_;
+ return moveArray(src);
+}
+
+Edits::~Edits() {
+ releaseArray();
+}
+
+void Edits::reset() U_NOEXCEPT {
length = delta = numChanges = 0;
+ errorCode_ = U_ZERO_ERROR;
}
void Edits::addUnchanged(int32_t unchangedLength) {
- if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
+ if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
if(unchangedLength < 0) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Merge into previous unchanged-text record, if any.
}
void Edits::addReplace(int32_t oldLength, int32_t newLength) {
- if(U_FAILURE(errorCode)) { return; }
+ if(U_FAILURE(errorCode_)) { return; }
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
}
if(oldLength < 0 || newLength < 0) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (oldLength == 0 && newLength == 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
(newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
// Integer overflow or underflow.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
delta += newDelta;
} else if (capacity == INT32_MAX) {
// Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
// with a result-string-buffer overflow.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
} else if (capacity >= (INT32_MAX / 2)) {
newCapacity = INT32_MAX;
}
// Grow by at least 5 units so that a maximal change record will fit.
if ((newCapacity - capacity) < 5) {
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+ errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
return FALSE;
}
uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
if (newArray == NULL) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
+ errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
uprv_memcpy(newArray, array, (size_t)length * 2);
- if (array != stackArray) {
- uprv_free(array);
- }
+ releaseArray();
array = newArray;
capacity = newCapacity;
return TRUE;
UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
if (U_FAILURE(outErrorCode)) { return TRUE; }
- if (U_SUCCESS(errorCode)) { return FALSE; }
- outErrorCode = errorCode;
+ if (U_SUCCESS(errorCode_)) { return FALSE; }
+ outErrorCode = errorCode_;
return TRUE;
}
+Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
+ if (copyErrorTo(errorCode)) { return *this; }
+ // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
+ // Parallel iteration over both Edits.
+ Iterator abIter = ab.getFineIterator();
+ Iterator bcIter = bc.getFineIterator();
+ UBool abHasNext = TRUE, bcHasNext = TRUE;
+ // Copy iterator state into local variables, so that we can modify and subdivide spans.
+ // ab old & new length, bc old & new length
+ int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
+ // When we have different-intermediate-length changes, we accumulate a larger change.
+ int32_t pending_aLength = 0, pending_cLength = 0;
+ for (;;) {
+ // At this point, for each of the two iterators:
+ // Either we are done with the locally cached current edit,
+ // and its intermediate-string length has been reset,
+ // or we will continue to work with a truncated remainder of this edit.
+ //
+ // If the current edit is done, and the iterator has not yet reached the end,
+ // then we fetch the next edit. This is true for at least one of the iterators.
+ //
+ // Normally it does not matter whether we fetch from ab and then bc or vice versa.
+ // However, the result is observably different when
+ // ab deletions meet bc insertions at the same intermediate-string index.
+ // Some users expect the bc insertions to come first, so we fetch from bc first.
+ if (bc_bLength == 0) {
+ if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
+ bc_bLength = bcIter.oldLength();
+ cLength = bcIter.newLength();
+ if (bc_bLength == 0) {
+ // insertion
+ if (ab_bLength == 0 || !abIter.hasChange()) {
+ addReplace(pending_aLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ } else {
+ pending_cLength += cLength;
+ }
+ continue;
+ }
+ }
+ // else see if the other iterator is done, too.
+ }
+ if (ab_bLength == 0) {
+ if (abHasNext && (abHasNext = abIter.next(errorCode))) {
+ aLength = abIter.oldLength();
+ ab_bLength = abIter.newLength();
+ if (ab_bLength == 0) {
+ // deletion
+ if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
+ addReplace(pending_aLength + aLength, pending_cLength);
+ pending_aLength = pending_cLength = 0;
+ } else {
+ pending_aLength += aLength;
+ }
+ continue;
+ }
+ } else if (bc_bLength == 0) {
+ // Both iterators are done at the same time:
+ // The intermediate-string lengths match.
+ break;
+ } else {
+ // The ab output string is shorter than the bc input string.
+ if (!copyErrorTo(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+ }
+ }
+ if (bc_bLength == 0) {
+ // The bc input string is shorter than the ab output string.
+ if (!copyErrorTo(errorCode)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+ }
+ // Done fetching: ab_bLength > 0 && bc_bLength > 0
+
+ // The current state has two parts:
+ // - Past: We accumulate a longer ac edit in the "pending" variables.
+ // - Current: We have copies of the current ab/bc edits in local variables.
+ // At least one side is newly fetched.
+ // One side might be a truncated remainder of an edit we fetched earlier.
+
+ if (!abIter.hasChange() && !bcIter.hasChange()) {
+ // An unchanged span all the way from string a to string c.
+ if (pending_aLength != 0 || pending_cLength != 0) {
+ addReplace(pending_aLength, pending_cLength);
+ pending_aLength = pending_cLength = 0;
+ }
+ int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
+ addUnchanged(unchangedLength);
+ ab_bLength = aLength -= unchangedLength;
+ bc_bLength = cLength -= unchangedLength;
+ // At least one of the unchanged spans is now empty.
+ continue;
+ }
+ if (!abIter.hasChange() && bcIter.hasChange()) {
+ // Unchanged a->b but changed b->c.
+ if (ab_bLength >= bc_bLength) {
+ // Split the longer unchanged span into change + remainder.
+ addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ aLength = ab_bLength -= bc_bLength;
+ bc_bLength = 0;
+ continue;
+ }
+ // Handle the shorter unchanged span below like a change.
+ } else if (abIter.hasChange() && !bcIter.hasChange()) {
+ // Changed a->b and then unchanged b->c.
+ if (ab_bLength <= bc_bLength) {
+ // Split the longer unchanged span into change + remainder.
+ addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
+ pending_aLength = pending_cLength = 0;
+ cLength = bc_bLength -= ab_bLength;
+ ab_bLength = 0;
+ continue;
+ }
+ // Handle the shorter unchanged span below like a change.
+ } else { // both abIter.hasChange() && bcIter.hasChange()
+ if (ab_bLength == bc_bLength) {
+ // Changes on both sides up to the same position. Emit & reset.
+ addReplace(pending_aLength + aLength, pending_cLength + cLength);
+ pending_aLength = pending_cLength = 0;
+ ab_bLength = bc_bLength = 0;
+ continue;
+ }
+ }
+ // Accumulate the a->c change, reset the shorter side,
+ // keep a remainder of the longer one.
+ pending_aLength += aLength;
+ pending_cLength += cLength;
+ if (ab_bLength < bc_bLength) {
+ bc_bLength -= ab_bLength;
+ cLength = ab_bLength = 0;
+ } else { // ab_bLength > bc_bLength
+ ab_bLength -= bc_bLength;
+ aLength = bc_bLength = 0;
+ }
+ }
+ if (pending_aLength != 0 || pending_cLength != 0) {
+ addReplace(pending_aLength, pending_cLength);
+ }
+ copyErrorTo(errorCode);
+ return *this;
+}
+
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
spanStart = destIndex;
spanLength = newLength_;
}
- // If we are at the start or limit of an empty span, then we search from
- // the start of the string so that we always return
- // the first of several consecutive empty spans, for consistent results.
- // We do not currently track the properties of the previous span,
- // so for now we always reset if we are at the start of the current span.
- if (i <= spanStart) {
+ if (i < spanStart) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
} else if (i < (spanStart + spanLength)) {
spanStart = destIndex;
spanLength = newLength_;
}
- if (i == spanStart || i < (spanStart + spanLength)) {
- // The index is in the current span, or at an empty one.
+ if (i < (spanStart + spanLength)) {
+ // The index is in the current span.
return 0;
}
if (remaining > 0) {
*/
Edits() :
array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
- errorCode(U_ZERO_ERROR) {}
+ errorCode_(U_ZERO_ERROR) {}
+ /**
+ * Copy constructor.
+ * @param other source edits
+ * @draft ICU 60
+ */
+ Edits(const Edits &other) :
+ array(stackArray), capacity(STACK_CAPACITY), length(other.length),
+ delta(other.delta), numChanges(other.numChanges),
+ errorCode_(other.errorCode_) {
+ copyArray(other);
+ }
+ /**
+ * Move constructor, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * @param src source edits
+ * @draft ICU 60
+ */
+ Edits(Edits &&src) U_NOEXCEPT :
+ array(stackArray), capacity(STACK_CAPACITY), length(src.length),
+ delta(src.delta), numChanges(src.numChanges),
+ errorCode_(src.errorCode_) {
+ moveArray(src);
+ }
+
/**
* Destructor.
* @draft ICU 59
*/
~Edits();
+ /**
+ * Assignment operator.
+ * @param other source edits
+ * @return *this
+ * @draft ICU 60
+ */
+ Edits &operator=(const Edits &other);
+
+ /**
+ * Move assignment operator, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source edits
+ * @return *this
+ * @draft ICU 60
+ */
+ Edits &operator=(Edits &&src) U_NOEXCEPT;
+
/**
* Resets the data but may not release memory.
* @draft ICU 59
*/
- void reset();
+ void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
* @draft ICU 59
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
+ /**
+ * Default constructor, empty iterator.
+ * @draft ICU 60
+ */
+ Iterator() :
+ array(nullptr), index(0), length(0),
+ remaining(0), onlyChanges_(FALSE), coarse(FALSE),
+ changed(FALSE), oldLength_(0), newLength_(0),
+ srcIndex(0), replIndex(0), destIndex(0) {}
/**
* Copy constructor.
* @draft ICU 59
return Iterator(array, length, FALSE, FALSE);
}
+ /**
+ * Merges the two input Edits and appends the result to this object.
+ *
+ * Consider two string transformations (for example, normalization and case mapping)
+ * where each records Edits in addition to writing an output string.<br>
+ * Edits ab reflect how substrings of input string a
+ * map to substrings of intermediate string b.<br>
+ * Edits bc reflect how substrings of intermediate string b
+ * map to substrings of output string c.<br>
+ * This function merges ab and bc such that the additional edits
+ * recorded in this object reflect how substrings of input string a
+ * map to substrings of output string c.
+ *
+ * If unrelated Edits are passed in where the output string of the first
+ * has a different length than the input string of the second,
+ * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
+ *
+ * @param ab reflects how substrings of input string a
+ * map to substrings of intermediate string b.
+ * @param bc reflects how substrings of intermediate string b
+ * map to substrings of output string c.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return *this, with the merged edits appended
+ * @draft ICU 60
+ */
+ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+
private:
- Edits(const Edits &) = delete;
- Edits &operator=(const Edits &) = delete;
+ void releaseArray() U_NOEXCEPT;
+ Edits ©Array(const Edits &other);
+ Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
int32_t length;
int32_t delta;
int32_t numChanges;
- UErrorCode errorCode;
+ UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY];
};
void TestMalformedUTF8();
void TestBufferOverflow();
void TestEdits();
+ void TestCopyMoveEdits();
+ void TestMergeEdits();
void TestCaseMapWithEdits();
void TestCaseMapUTF8WithEdits();
void TestLongUnicodeString();
TESTCASE_AUTO(TestMalformedUTF8);
TESTCASE_AUTO(TestBufferOverflow);
TESTCASE_AUTO(TestEdits);
+ TESTCASE_AUTO(TestCopyMoveEdits);
+ TESTCASE_AUTO(TestMergeEdits);
TESTCASE_AUTO(TestCaseMapWithEdits);
TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
TESTCASE_AUTO(TestLongUnicodeString);
assertFalse("reset then iterator", ei.next(errorCode));
}
+void StringCaseTest::TestCopyMoveEdits() {
+ IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
+ // Exceed the stack array capacity.
+ Edits a;
+ for (int32_t i = 0; i < 250; ++i) {
+ a.addReplace(i % 10, (i % 10) + 1);
+ }
+ assertEquals("a: many edits, length delta", 250, a.lengthDelta());
+
+ // copy
+ Edits b(a);
+ assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
+ assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
+ TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
+
+ // assign
+ Edits c;
+ c.addUnchanged(99);
+ c.addReplace(88, 77);
+ c = b;
+ assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
+ assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
+ TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
+
+ // move constructor empties object with heap array
+ Edits d(std::move(a));
+ assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
+ assertFalse("a moved away: no more hasChanges", a.hasChanges());
+ TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
+ Edits empty;
+ TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
+
+ // move assignment empties object with heap array
+ Edits e;
+ e.addReplace(0, 1000);
+ e = std::move(b);
+ assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
+ assertFalse("b moved away: no more hasChanges", b.hasChanges());
+ TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
+ TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
+
+ // Edits::Iterator default constructor.
+ Edits::Iterator iter;
+ assertFalse("Edits::Iterator().next()", iter.next(errorCode));
+ assertSuccess("Edits::Iterator().next()", errorCode);
+ iter = e.getFineChangesIterator();
+ assertTrue("iter.next()", iter.next(errorCode));
+ assertSuccess("iter.next()", errorCode);
+ assertTrue("iter.hasChange()", iter.hasChange());
+ assertEquals("iter.newLength()", 1, iter.newLength());
+}
+
+void StringCaseTest::TestMergeEdits() {
+ // For debugging, set -v to see matching edits up to a failure.
+ IcuTestErrorCode errorCode(*this, "TestMergeEdits");
+ Edits ab, bc, ac, expected_ac;
+
+ // Simple: Two parallel non-changes.
+ ab.addUnchanged(2);
+ bc.addUnchanged(2);
+ expected_ac.addUnchanged(2);
+
+ // Simple: Two aligned changes.
+ ab.addReplace(3, 2);
+ bc.addReplace(2, 1);
+ expected_ac.addReplace(3, 1);
+
+ // Unequal non-changes.
+ ab.addUnchanged(5);
+ bc.addUnchanged(3);
+ expected_ac.addUnchanged(3);
+ // ab ahead by 2
+
+ // Overlapping changes accumulate until they share a boundary.
+ ab.addReplace(4, 3);
+ bc.addReplace(3, 2);
+ ab.addReplace(4, 3);
+ bc.addReplace(3, 2);
+ ab.addReplace(4, 3);
+ bc.addReplace(3, 2);
+ bc.addUnchanged(4);
+ expected_ac.addReplace(14, 8);
+ // bc ahead by 2
+
+ // Balance out intermediate-string lengths.
+ ab.addUnchanged(2);
+ expected_ac.addUnchanged(2);
+
+ // Insert something and delete it: Should disappear.
+ ab.addReplace(0, 5);
+ ab.addReplace(0, 2);
+ bc.addReplace(7, 0);
+
+ // Parallel change to make a new boundary.
+ ab.addReplace(1, 2);
+ bc.addReplace(2, 3);
+ expected_ac.addReplace(1, 3);
+
+ // Multiple ab deletions should remain separate at the boundary.
+ ab.addReplace(1, 0);
+ ab.addReplace(2, 0);
+ ab.addReplace(3, 0);
+ expected_ac.addReplace(1, 0);
+ expected_ac.addReplace(2, 0);
+ expected_ac.addReplace(3, 0);
+
+ // Unequal non-changes can be split for another boundary.
+ ab.addUnchanged(2);
+ bc.addUnchanged(1);
+ expected_ac.addUnchanged(1);
+ // ab ahead by 1
+
+ // Multiple bc insertions should create a boundary and remain separate.
+ bc.addReplace(0, 4);
+ bc.addReplace(0, 5);
+ bc.addReplace(0, 6);
+ expected_ac.addReplace(0, 4);
+ expected_ac.addReplace(0, 5);
+ expected_ac.addReplace(0, 6);
+ // ab ahead by 1
+
+ // Multiple ab deletions in the middle of a bc change are merged.
+ bc.addReplace(2, 2);
+ // bc ahead by 1
+ ab.addReplace(1, 0);
+ ab.addReplace(2, 0);
+ ab.addReplace(3, 0);
+ ab.addReplace(4, 1);
+ expected_ac.addReplace(11, 2);
+
+ // Multiple bc insertions in the middle of an ab change are merged.
+ ab.addReplace(5, 6);
+ bc.addReplace(3, 3);
+ // ab ahead by 3
+ bc.addReplace(0, 4);
+ bc.addReplace(0, 5);
+ bc.addReplace(0, 6);
+ bc.addReplace(3, 7);
+ expected_ac.addReplace(5, 25);
+
+ // Delete around a deletion.
+ ab.addReplace(4, 4);
+ ab.addReplace(3, 0);
+ ab.addUnchanged(2);
+ bc.addReplace(2, 2);
+ bc.addReplace(4, 0);
+ expected_ac.addReplace(9, 2);
+
+ // Insert into an insertion.
+ ab.addReplace(0, 2);
+ bc.addReplace(1, 1);
+ bc.addReplace(0, 8);
+ bc.addUnchanged(4);
+ expected_ac.addReplace(0, 10);
+ // bc ahead by 3
+
+ // Balance out intermediate-string lengths.
+ ab.addUnchanged(3);
+ expected_ac.addUnchanged(3);
+
+ // Deletions meet insertions.
+ // Output order is arbitrary in principle, but we expect insertions first
+ // and want to keep it that way.
+ ab.addReplace(2, 0);
+ ab.addReplace(4, 0);
+ ab.addReplace(6, 0);
+ bc.addReplace(0, 1);
+ bc.addReplace(0, 3);
+ bc.addReplace(0, 5);
+ expected_ac.addReplace(0, 1);
+ expected_ac.addReplace(0, 3);
+ expected_ac.addReplace(0, 5);
+ expected_ac.addReplace(2, 0);
+ expected_ac.addReplace(4, 0);
+ expected_ac.addReplace(6, 0);
+
+ // End with a non-change, so that further edits are never reordered.
+ ab.addUnchanged(1);
+ bc.addUnchanged(1);
+ expected_ac.addUnchanged(1);
+
+ ac.mergeAndAppend(ab, bc, errorCode);
+ assertSuccess("ab+bc", errorCode);
+ if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
+ return;
+ }
+
+ // Append more Edits.
+ Edits ab2, bc2;
+ ab2.addUnchanged(5);
+ bc2.addReplace(1, 2);
+ bc2.addUnchanged(4);
+ expected_ac.addReplace(1, 2);
+ expected_ac.addUnchanged(4);
+ ac.mergeAndAppend(ab2, bc2, errorCode);
+ assertSuccess("ab2+bc2", errorCode);
+ if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
+ return;
+ }
+
+ // Append empty edits.
+ Edits empty;
+ ac.mergeAndAppend(empty, empty, errorCode);
+ assertSuccess("empty+empty", errorCode);
+ if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
+ return;
+ }
+
+ // Error: Append more edits with mismatched intermediate-string lengths.
+ Edits mismatch;
+ mismatch.addReplace(1, 1);
+ ac.mergeAndAppend(ab2, mismatch, errorCode);
+ assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
+ errorCode.reset();
+ ac.mergeAndAppend(mismatch, bc2, errorCode);
+ assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
+ errorCode.reset();
+}
+
void StringCaseTest::TestCaseMapWithEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
UChar dest[20];
**********************************************************************
*/
+#include <algorithm>
+#include <vector>
#include "unicode/utypes.h"
#include "unicode/edits.h"
#include "unicode/unistr.h"
return buf;
}
+namespace {
+
+UnicodeString printOneEdit(const Edits::Iterator &ei) {
+ if (ei.hasChange()) {
+ return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
+ } else {
+ return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
+ }
+}
+
+/**
+ * Maps indexes according to the expected edits.
+ * A destination index can occur multiple times when there are source deletions.
+ * Map according to the last occurrence, normally in a non-empty destination span.
+ * Simplest is to search from the back.
+ */
+int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
+ int32_t srcLength, int32_t destLength, int32_t index) {
+ int32_t srcIndex = srcLength;
+ int32_t destIndex = destLength;
+ int32_t i = expLength;
+ while (index < destIndex && i > 0) {
+ --i;
+ int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
+ int32_t prevDestIndex = destIndex - expected[i].newLength;
+ if (index == prevDestIndex) {
+ return prevSrcIndex;
+ } else if (index > prevDestIndex) {
+ if (expected[i].change) {
+ // In a change span, map to its end.
+ return srcIndex;
+ } else {
+ // In an unchanged span, offset within it.
+ return prevSrcIndex + (index - prevDestIndex);
+ }
+ }
+ srcIndex = prevSrcIndex;
+ destIndex = prevDestIndex;
+ }
+ // index is outside the string.
+ return srcIndex;
+}
+
+int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
+ int32_t srcLength, int32_t destLength, int32_t index) {
+ int32_t srcIndex = srcLength;
+ int32_t destIndex = destLength;
+ int32_t i = expLength;
+ while (index < srcIndex && i > 0) {
+ --i;
+ int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
+ int32_t prevDestIndex = destIndex - expected[i].newLength;
+ if (index == prevSrcIndex) {
+ return prevDestIndex;
+ } else if (index > prevSrcIndex) {
+ if (expected[i].change) {
+ // In a change span, map to its end.
+ return destIndex;
+ } else {
+ // In an unchanged span, offset within it.
+ return prevDestIndex + (index - prevSrcIndex);
+ }
+ }
+ srcIndex = prevSrcIndex;
+ destIndex = prevDestIndex;
+ }
+ // index is outside the string.
+ return destIndex;
+}
+
+} // namespace
+
+// For debugging, set -v to see matching edits up to a failure.
+UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
+ const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
+ Edits::Iterator ei1 = e1.getFineIterator();
+ Edits::Iterator ei2 = e2.getFineIterator();
+ UBool ok = TRUE;
+ for (int32_t i = 0; ok; ++i) {
+ UBool ei1HasNext = ei1.next(errorCode);
+ UBool ei2HasNext = ei2.next(errorCode);
+ ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
+ ei1HasNext, ei2HasNext);
+ ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
+ ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
+ printOneEdit(ei1), printOneEdit(ei2));
+ if (!ei1HasNext || !ei2HasNext) {
+ break;
+ }
+ test.logln();
+ }
+ return ok;
+}
+
void TestUtility::checkEditsIter(
IntlTest &test,
const UnicodeString &name,
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
- int32_t expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
- int32_t expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
const EditChange &expect = expected[expIndex];
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
}
- if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
+ if (expect.oldLength > 0) {
test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
}
}
- if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+ if (expect.newLength > 0) {
test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
}
}
- // Span starts.
- test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
- ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
- test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
- ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
-
- // Inside unchanged span map offsets 1:1.
- if (!expect.change && expect.oldLength >= 2) {
- test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
- ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
- test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
- ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
- }
-
- // Inside change span map to the span limit.
- int32_t expSrcLimit = expSrcIndex + expect.oldLength;
- int32_t expDestLimit = expDestIndex + expect.newLength;
- if (expect.change) {
- if (expect.oldLength >= 2) {
- test.assertEquals(name + u":" + __LINE__, expDestLimit,
- ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
- }
- if (expect.newLength >= 2) {
- test.assertEquals(name + u":" + __LINE__, expSrcLimit,
- ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
- }
- }
-
- expSrcIndex = expSrcLimit;
- expDestIndex = expDestLimit;
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
if (expect.change) {
expReplIndex += expect.newLength;
}
- if (expect.newLength > 0) {
- expSrcIndexFromDest = expSrcIndex;
- }
- if (expect.oldLength > 0) {
- expDestIndexFromSrc = expDestIndex;
- }
}
UnicodeString msg = UnicodeString(name).append(u" end");
test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
- test.assertEquals(name + u":" + __LINE__, expDestIndex,
- ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
- test.assertEquals(name + u":" + __LINE__, expSrcIndex,
- ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
+
+ // Check mapping of all indexes against a simple implementation
+ // that works on the expected changes.
+ // Iterate once forward, once backward, to cover more runtime conditions.
+ int32_t srcLength = expSrcIndex;
+ int32_t destLength = expDestIndex;
+ std::vector<int32_t> srcIndexes;
+ std::vector<int32_t> destIndexes;
+ srcIndexes.push_back(-1);
+ destIndexes.push_back(-1);
+ int32_t srcIndex = 0;
+ int32_t destIndex = 0;
+ for (int32_t i = 0; i < expLength; ++i) {
+ if (expected[i].oldLength > 0) {
+ srcIndexes.push_back(srcIndex);
+ if (expected[i].oldLength > 1) {
+ srcIndexes.push_back(srcIndex + 1);
+ }
+ }
+ if (expected[i].newLength > 0) {
+ destIndexes.push_back(destIndex);
+ if (expected[i].newLength > 0) {
+ destIndexes.push_back(destIndex + 1);
+ }
+ }
+ srcIndex += expected[i].oldLength;
+ destIndex += expected[i].newLength;
+ }
+ srcIndexes.push_back(srcLength);
+ destIndexes.push_back(destLength);
+ srcIndexes.push_back(srcLength + 1);
+ destIndexes.push_back(destLength + 1);
+ std::reverse(destIndexes.begin(), destIndexes.end());
+ for (int32_t i : srcIndexes) {
+ test.assertEquals(name + u" destIndexFromSrc(" + i + u"):" + __LINE__,
+ destIndexFromSrc(expected, expLength, srcLength, destLength, i),
+ ei2.destinationIndexFromSourceIndex(i, errorCode));
+ }
+ for (int32_t i : destIndexes) {
+ test.assertEquals(name + u" srcIndexFromDest(" + i + u"):" + __LINE__,
+ srcIndexFromDest(expected, expLength, srcLength, destLength, i),
+ ei2.sourceIndexFromDestinationIndex(i, errorCode));
+ }
}
static UnicodeString hex(const uint8_t* bytes, int32_t len);
+ static UBool checkEqualEdits(IntlTest &test, const UnicodeString &name,
+ const Edits &e1, const Edits &e2, UErrorCode &errorCode);
+
static void checkEditsIter(
IntlTest &test, const UnicodeString &name,
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators