ICU-13307 C++ Edits::mergedAndAppend(ab, bc); map indexes only from inside spans...
authorMarkus Scherer <markus.icu@gmail.com>
Wed, 16 Aug 2017 19:19:30 +0000 (19:19 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Wed, 16 Aug 2017 19:19:30 +0000 (19:19 +0000)
X-SVN-Rev: 40333

icu4c/source/common/edits.cpp
icu4c/source/common/unicode/edits.h
icu4c/source/test/intltest/strcase.cpp
icu4c/source/test/intltest/testutil.cpp
icu4c/source/test/intltest/testutil.h

index ee1b22d06c940e9584ad90ddb1c7b8c656d40495..55c96f187a92f74ca62dbce05e5ac74a541979b8 100644 (file)
@@ -33,20 +33,85 @@ const int32_t LENGTH_IN_2TRAIL = 62;
 
 }  // namespace
 
-Edits::~Edits() {
-    if(array != stackArray) {
+void Edits::releaseArray() U_NOEXCEPT {
+    if (array != stackArray) {
         uprv_free(array);
     }
 }
 
-void Edits::reset() {
+Edits &Edits::copyArray(const Edits &other) {
+    if (U_FAILURE(errorCode_)) {
+        length = delta = numChanges = 0;
+        return *this;
+    }
+    if (length > capacity) {
+        uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2);
+        if (newArray == nullptr) {
+            length = delta = numChanges = 0;
+            errorCode_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        releaseArray();
+        array = newArray;
+        capacity = length;
+    }
+    if (length > 0) {
+        uprv_memcpy(array, other.array, (size_t)length * 2);
+    }
+    return *this;
+}
+
+Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
+    if (U_FAILURE(errorCode_)) {
+        length = delta = numChanges = 0;
+        return *this;
+    }
+    releaseArray();
+    if (length > STACK_CAPACITY) {
+        array = src.array;
+        capacity = src.capacity;
+        src.array = src.stackArray;
+        src.capacity = STACK_CAPACITY;
+        src.reset();
+        return *this;
+    }
+    array = stackArray;
+    capacity = STACK_CAPACITY;
+    if (length > 0) {
+        uprv_memcpy(array, src.array, (size_t)length * 2);
+    }
+    return *this;
+}
+
+Edits &Edits::operator=(const Edits &other) {
+    length = other.length;
+    delta = other.delta;
+    numChanges = other.numChanges;
+    errorCode_ = other.errorCode_;
+    return copyArray(other);
+}
+
+Edits &Edits::operator=(Edits &&src) U_NOEXCEPT {
+    length = src.length;
+    delta = src.delta;
+    numChanges = src.numChanges;
+    errorCode_ = src.errorCode_;
+    return moveArray(src);
+}
+
+Edits::~Edits() {
+    releaseArray();
+}
+
+void Edits::reset() U_NOEXCEPT {
     length = delta = numChanges = 0;
+    errorCode_ = U_ZERO_ERROR;
 }
 
 void Edits::addUnchanged(int32_t unchangedLength) {
-    if(U_FAILURE(errorCode) || unchangedLength == 0) { return; }
+    if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; }
     if(unchangedLength < 0) {
-        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
     // Merge into previous unchanged-text record, if any.
@@ -72,7 +137,7 @@ void Edits::addUnchanged(int32_t unchangedLength) {
 }
 
 void Edits::addReplace(int32_t oldLength, int32_t newLength) {
-    if(U_FAILURE(errorCode)) { return; }
+    if(U_FAILURE(errorCode_)) { return; }
     if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
         // Replacement of short oldLength text units by same-length new text.
         // Merge into previous short-replacement record, if any.
@@ -88,7 +153,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
     }
 
     if(oldLength < 0 || newLength < 0) {
-        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode_ = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
     if (oldLength == 0 && newLength == 0) {
@@ -100,7 +165,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
         if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
                 (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) {
             // Integer overflow or underflow.
-            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
             return;
         }
         delta += newDelta;
@@ -151,7 +216,7 @@ UBool Edits::growArray() {
     } else if (capacity == INT32_MAX) {
         // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API
         // with a result-string-buffer overflow.
-        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
         return FALSE;
     } else if (capacity >= (INT32_MAX / 2)) {
         newCapacity = INT32_MAX;
@@ -160,18 +225,16 @@ UBool Edits::growArray() {
     }
     // Grow by at least 5 units so that a maximal change record will fit.
     if ((newCapacity - capacity) < 5) {
-        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR;
         return FALSE;
     }
     uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2);
     if (newArray == NULL) {
-        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        errorCode_ = U_MEMORY_ALLOCATION_ERROR;
         return FALSE;
     }
     uprv_memcpy(newArray, array, (size_t)length * 2);
-    if (array != stackArray) {
-        uprv_free(array);
-    }
+    releaseArray();
     array = newArray;
     capacity = newCapacity;
     return TRUE;
@@ -179,11 +242,157 @@ UBool Edits::growArray() {
 
 UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
     if (U_FAILURE(outErrorCode)) { return TRUE; }
-    if (U_SUCCESS(errorCode)) { return FALSE; }
-    outErrorCode = errorCode;
+    if (U_SUCCESS(errorCode_)) { return FALSE; }
+    outErrorCode = errorCode_;
     return TRUE;
 }
 
+Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) {
+    if (copyErrorTo(errorCode)) { return *this; }
+    // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c.
+    // Parallel iteration over both Edits.
+    Iterator abIter = ab.getFineIterator();
+    Iterator bcIter = bc.getFineIterator();
+    UBool abHasNext = TRUE, bcHasNext = TRUE;
+    // Copy iterator state into local variables, so that we can modify and subdivide spans.
+    // ab old & new length, bc old & new length
+    int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0;
+    // When we have different-intermediate-length changes, we accumulate a larger change.
+    int32_t pending_aLength = 0, pending_cLength = 0;
+    for (;;) {
+        // At this point, for each of the two iterators:
+        // Either we are done with the locally cached current edit,
+        // and its intermediate-string length has been reset,
+        // or we will continue to work with a truncated remainder of this edit.
+        //
+        // If the current edit is done, and the iterator has not yet reached the end,
+        // then we fetch the next edit. This is true for at least one of the iterators.
+        //
+        // Normally it does not matter whether we fetch from ab and then bc or vice versa.
+        // However, the result is observably different when
+        // ab deletions meet bc insertions at the same intermediate-string index.
+        // Some users expect the bc insertions to come first, so we fetch from bc first.
+        if (bc_bLength == 0) {
+            if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) {
+                bc_bLength = bcIter.oldLength();
+                cLength = bcIter.newLength();
+                if (bc_bLength == 0) {
+                    // insertion
+                    if (ab_bLength == 0 || !abIter.hasChange()) {
+                        addReplace(pending_aLength, pending_cLength + cLength);
+                        pending_aLength = pending_cLength = 0;
+                    } else {
+                        pending_cLength += cLength;
+                    }
+                    continue;
+                }
+            }
+            // else see if the other iterator is done, too.
+        }
+        if (ab_bLength == 0) {
+            if (abHasNext && (abHasNext = abIter.next(errorCode))) {
+                aLength = abIter.oldLength();
+                ab_bLength = abIter.newLength();
+                if (ab_bLength == 0) {
+                    // deletion
+                    if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) {
+                        addReplace(pending_aLength + aLength, pending_cLength);
+                        pending_aLength = pending_cLength = 0;
+                    } else {
+                        pending_aLength += aLength;
+                    }
+                    continue;
+                }
+            } else if (bc_bLength == 0) {
+                // Both iterators are done at the same time:
+                // The intermediate-string lengths match.
+                break;
+            } else {
+                // The ab output string is shorter than the bc input string.
+                if (!copyErrorTo(errorCode)) {
+                    errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+                }
+                return *this;
+            }
+        }
+        if (bc_bLength == 0) {
+            // The bc input string is shorter than the ab output string.
+            if (!copyErrorTo(errorCode)) {
+                errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+            }
+            return *this;
+        }
+        //  Done fetching: ab_bLength > 0 && bc_bLength > 0
+
+        // The current state has two parts:
+        // - Past: We accumulate a longer ac edit in the "pending" variables.
+        // - Current: We have copies of the current ab/bc edits in local variables.
+        //   At least one side is newly fetched.
+        //   One side might be a truncated remainder of an edit we fetched earlier.
+
+        if (!abIter.hasChange() && !bcIter.hasChange()) {
+            // An unchanged span all the way from string a to string c.
+            if (pending_aLength != 0 || pending_cLength != 0) {
+                addReplace(pending_aLength, pending_cLength);
+                pending_aLength = pending_cLength = 0;
+            }
+            int32_t unchangedLength = aLength <= cLength ? aLength : cLength;
+            addUnchanged(unchangedLength);
+            ab_bLength = aLength -= unchangedLength;
+            bc_bLength = cLength -= unchangedLength;
+            // At least one of the unchanged spans is now empty.
+            continue;
+        }
+        if (!abIter.hasChange() && bcIter.hasChange()) {
+            // Unchanged a->b but changed b->c.
+            if (ab_bLength >= bc_bLength) {
+                // Split the longer unchanged span into change + remainder.
+                addReplace(pending_aLength + bc_bLength, pending_cLength + cLength);
+                pending_aLength = pending_cLength = 0;
+                aLength = ab_bLength -= bc_bLength;
+                bc_bLength = 0;
+                continue;
+            }
+            // Handle the shorter unchanged span below like a change.
+        } else if (abIter.hasChange() && !bcIter.hasChange()) {
+            // Changed a->b and then unchanged b->c.
+            if (ab_bLength <= bc_bLength) {
+                // Split the longer unchanged span into change + remainder.
+                addReplace(pending_aLength + aLength, pending_cLength + ab_bLength);
+                pending_aLength = pending_cLength = 0;
+                cLength = bc_bLength -= ab_bLength;
+                ab_bLength = 0;
+                continue;
+            }
+            // Handle the shorter unchanged span below like a change.
+        } else {  // both abIter.hasChange() && bcIter.hasChange()
+            if (ab_bLength == bc_bLength) {
+                // Changes on both sides up to the same position. Emit & reset.
+                addReplace(pending_aLength + aLength, pending_cLength + cLength);
+                pending_aLength = pending_cLength = 0;
+                ab_bLength = bc_bLength = 0;
+                continue;
+            }
+        }
+        // Accumulate the a->c change, reset the shorter side,
+        // keep a remainder of the longer one.
+        pending_aLength += aLength;
+        pending_cLength += cLength;
+        if (ab_bLength < bc_bLength) {
+            bc_bLength -= ab_bLength;
+            cLength = ab_bLength = 0;
+        } else {  // ab_bLength > bc_bLength
+            ab_bLength -= bc_bLength;
+            aLength = bc_bLength = 0;
+        }
+    }
+    if (pending_aLength != 0 || pending_cLength != 0) {
+        addReplace(pending_aLength, pending_cLength);
+    }
+    copyErrorTo(errorCode);
+    return *this;
+}
+
 Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
         array(a), index(0), length(len), remaining(0),
         onlyChanges_(oc), coarse(crs),
@@ -308,12 +517,7 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
         spanStart = destIndex;
         spanLength = newLength_;
     }
-    // If we are at the start or limit of an empty span, then we search from
-    // the start of the string so that we always return
-    // the first of several consecutive empty spans, for consistent results.
-    // We do not currently track the properties of the previous span,
-    // so for now we always reset if we are at the start of the current span.
-    if (i <= spanStart) {
+    if (i < spanStart) {
         // Reset the iterator to the start.
         index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
     } else if (i < (spanStart + spanLength)) {
@@ -328,8 +532,8 @@ int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &erro
             spanStart = destIndex;
             spanLength = newLength_;
         }
-        if (i == spanStart || i < (spanStart + spanLength)) {
-            // The index is in the current span, or at an empty one.
+        if (i < (spanStart + spanLength)) {
+            // The index is in the current span.
             return 0;
         }
         if (remaining > 0) {
index 21ea0d1184ec7803be4dfc4878941b9e8d6117dd..a7ea8e021d18d4b8b0baae815e0aa2571ce7fe50 100644 (file)
@@ -37,18 +37,60 @@ public:
      */
     Edits() :
             array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
-            errorCode(U_ZERO_ERROR) {}
+            errorCode_(U_ZERO_ERROR) {}
+    /**
+     * Copy constructor.
+     * @param other source edits
+     * @draft ICU 60
+     */
+    Edits(const Edits &other) :
+            array(stackArray), capacity(STACK_CAPACITY), length(other.length),
+            delta(other.delta), numChanges(other.numChanges),
+            errorCode_(other.errorCode_) {
+        copyArray(other);
+    }
+    /**
+     * Move constructor, might leave src empty.
+     * This object will have the same contents that the source object had.
+     * @param src source edits
+     * @draft ICU 60
+     */
+    Edits(Edits &&src) U_NOEXCEPT :
+            array(stackArray), capacity(STACK_CAPACITY), length(src.length),
+            delta(src.delta), numChanges(src.numChanges),
+            errorCode_(src.errorCode_) {
+        moveArray(src);
+    }
+
     /**
      * Destructor.
      * @draft ICU 59
      */
     ~Edits();
 
+    /**
+     * Assignment operator.
+     * @param other source edits
+     * @return *this
+     * @draft ICU 60
+     */
+    Edits &operator=(const Edits &other);
+
+    /**
+     * Move assignment operator, might leave src empty.
+     * This object will have the same contents that the source object had.
+     * The behavior is undefined if *this and src are the same object.
+     * @param src source edits
+     * @return *this
+     * @draft ICU 60
+     */
+    Edits &operator=(Edits &&src) U_NOEXCEPT;
+
     /**
      * Resets the data but may not release memory.
      * @draft ICU 59
      */
-    void reset();
+    void reset() U_NOEXCEPT;
 
     /**
      * Adds a record for an unchanged segment of text.
@@ -99,6 +141,15 @@ public:
      * @draft ICU 59
      */
     struct U_COMMON_API Iterator U_FINAL : public UMemory {
+        /**
+         * Default constructor, empty iterator.
+         * @draft ICU 60
+         */
+        Iterator() :
+                array(nullptr), index(0), length(0),
+                remaining(0), onlyChanges_(FALSE), coarse(FALSE),
+                changed(FALSE), oldLength_(0), newLength_(0),
+                srcIndex(0), replIndex(0), destIndex(0) {}
         /**
          * Copy constructor.
          * @draft ICU 59
@@ -309,9 +360,39 @@ public:
         return Iterator(array, length, FALSE, FALSE);
     }
 
+    /**
+     * Merges the two input Edits and appends the result to this object.
+     *
+     * Consider two string transformations (for example, normalization and case mapping)
+     * where each records Edits in addition to writing an output string.<br>
+     * Edits ab reflect how substrings of input string a
+     * map to substrings of intermediate string b.<br>
+     * Edits bc reflect how substrings of intermediate string b
+     * map to substrings of output string c.<br>
+     * This function merges ab and bc such that the additional edits
+     * recorded in this object reflect how substrings of input string a
+     * map to substrings of output string c.
+     *
+     * If unrelated Edits are passed in where the output string of the first
+     * has a different length than the input string of the second,
+     * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
+     *
+     * @param ab reflects how substrings of input string a
+     *     map to substrings of intermediate string b.
+     * @param bc reflects how substrings of intermediate string b
+     *     map to substrings of output string c.
+     * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+     *                  or else the function returns immediately. Check for U_FAILURE()
+     *                  on output or use with function chaining. (See User Guide for details.)
+     * @return *this, with the merged edits appended
+     * @draft ICU 60
+     */
+    Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+
 private:
-    Edits(const Edits &) = delete;
-    Edits &operator=(const Edits &) = delete;
+    void releaseArray() U_NOEXCEPT;
+    Edits &copyArray(const Edits &other);
+    Edits &moveArray(Edits &src) U_NOEXCEPT;
 
     void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
     int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@@ -325,7 +406,7 @@ private:
     int32_t length;
     int32_t delta;
     int32_t numChanges;
-    UErrorCode errorCode;
+    UErrorCode errorCode_;
     uint16_t stackArray[STACK_CAPACITY];
 };
 
index c8e48c46445b83b0e52deacb38eb6941e5aa18dc..28c93e1ec1efab54e19c471b89849006a48e600c 100644 (file)
@@ -57,6 +57,8 @@ public:
     void TestMalformedUTF8();
     void TestBufferOverflow();
     void TestEdits();
+    void TestCopyMoveEdits();
+    void TestMergeEdits();
     void TestCaseMapWithEdits();
     void TestCaseMapUTF8WithEdits();
     void TestLongUnicodeString();
@@ -94,6 +96,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
     TESTCASE_AUTO(TestMalformedUTF8);
     TESTCASE_AUTO(TestBufferOverflow);
     TESTCASE_AUTO(TestEdits);
+    TESTCASE_AUTO(TestCopyMoveEdits);
+    TESTCASE_AUTO(TestMergeEdits);
     TESTCASE_AUTO(TestCaseMapWithEdits);
     TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
     TESTCASE_AUTO(TestLongUnicodeString);
@@ -966,6 +970,225 @@ void StringCaseTest::TestEdits() {
     assertFalse("reset then iterator", ei.next(errorCode));
 }
 
+void StringCaseTest::TestCopyMoveEdits() {
+    IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
+    // Exceed the stack array capacity.
+    Edits a;
+    for (int32_t i = 0; i < 250; ++i) {
+        a.addReplace(i % 10, (i % 10) + 1);
+    }
+    assertEquals("a: many edits, length delta", 250, a.lengthDelta());
+
+    // copy
+    Edits b(a);
+    assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
+    assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
+    TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
+
+    // assign
+    Edits c;
+    c.addUnchanged(99);
+    c.addReplace(88, 77);
+    c = b;
+    assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
+    assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
+    TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
+
+    // move constructor empties object with heap array
+    Edits d(std::move(a));
+    assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
+    assertFalse("a moved away: no more hasChanges", a.hasChanges());
+    TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
+    Edits empty;
+    TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
+
+    // move assignment empties object with heap array
+    Edits e;
+    e.addReplace(0, 1000);
+    e = std::move(b);
+    assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
+    assertFalse("b moved away: no more hasChanges", b.hasChanges());
+    TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
+    TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
+
+    // Edits::Iterator default constructor.
+    Edits::Iterator iter;
+    assertFalse("Edits::Iterator().next()", iter.next(errorCode));
+    assertSuccess("Edits::Iterator().next()", errorCode);
+    iter = e.getFineChangesIterator();
+    assertTrue("iter.next()", iter.next(errorCode));
+    assertSuccess("iter.next()", errorCode);
+    assertTrue("iter.hasChange()", iter.hasChange());
+    assertEquals("iter.newLength()", 1, iter.newLength());
+}
+
+void StringCaseTest::TestMergeEdits() {
+    // For debugging, set -v to see matching edits up to a failure.
+    IcuTestErrorCode errorCode(*this, "TestMergeEdits");
+    Edits ab, bc, ac, expected_ac;
+
+    // Simple: Two parallel non-changes.
+    ab.addUnchanged(2);
+    bc.addUnchanged(2);
+    expected_ac.addUnchanged(2);
+
+    // Simple: Two aligned changes.
+    ab.addReplace(3, 2);
+    bc.addReplace(2, 1);
+    expected_ac.addReplace(3, 1);
+
+    // Unequal non-changes.
+    ab.addUnchanged(5);
+    bc.addUnchanged(3);
+    expected_ac.addUnchanged(3);
+    // ab ahead by 2
+
+    // Overlapping changes accumulate until they share a boundary.
+    ab.addReplace(4, 3);
+    bc.addReplace(3, 2);
+    ab.addReplace(4, 3);
+    bc.addReplace(3, 2);
+    ab.addReplace(4, 3);
+    bc.addReplace(3, 2);
+    bc.addUnchanged(4);
+    expected_ac.addReplace(14, 8);
+    // bc ahead by 2
+
+    // Balance out intermediate-string lengths.
+    ab.addUnchanged(2);
+    expected_ac.addUnchanged(2);
+
+    // Insert something and delete it: Should disappear.
+    ab.addReplace(0, 5);
+    ab.addReplace(0, 2);
+    bc.addReplace(7, 0);
+
+    // Parallel change to make a new boundary.
+    ab.addReplace(1, 2);
+    bc.addReplace(2, 3);
+    expected_ac.addReplace(1, 3);
+
+    // Multiple ab deletions should remain separate at the boundary.
+    ab.addReplace(1, 0);
+    ab.addReplace(2, 0);
+    ab.addReplace(3, 0);
+    expected_ac.addReplace(1, 0);
+    expected_ac.addReplace(2, 0);
+    expected_ac.addReplace(3, 0);
+
+    // Unequal non-changes can be split for another boundary.
+    ab.addUnchanged(2);
+    bc.addUnchanged(1);
+    expected_ac.addUnchanged(1);
+    // ab ahead by 1
+
+    // Multiple bc insertions should create a boundary and remain separate.
+    bc.addReplace(0, 4);
+    bc.addReplace(0, 5);
+    bc.addReplace(0, 6);
+    expected_ac.addReplace(0, 4);
+    expected_ac.addReplace(0, 5);
+    expected_ac.addReplace(0, 6);
+    // ab ahead by 1
+
+    // Multiple ab deletions in the middle of a bc change are merged.
+    bc.addReplace(2, 2);
+    // bc ahead by 1
+    ab.addReplace(1, 0);
+    ab.addReplace(2, 0);
+    ab.addReplace(3, 0);
+    ab.addReplace(4, 1);
+    expected_ac.addReplace(11, 2);
+
+    // Multiple bc insertions in the middle of an ab change are merged.
+    ab.addReplace(5, 6);
+    bc.addReplace(3, 3);
+    // ab ahead by 3
+    bc.addReplace(0, 4);
+    bc.addReplace(0, 5);
+    bc.addReplace(0, 6);
+    bc.addReplace(3, 7);
+    expected_ac.addReplace(5, 25);
+
+    // Delete around a deletion.
+    ab.addReplace(4, 4);
+    ab.addReplace(3, 0);
+    ab.addUnchanged(2);
+    bc.addReplace(2, 2);
+    bc.addReplace(4, 0);
+    expected_ac.addReplace(9, 2);
+
+    // Insert into an insertion.
+    ab.addReplace(0, 2);
+    bc.addReplace(1, 1);
+    bc.addReplace(0, 8);
+    bc.addUnchanged(4);
+    expected_ac.addReplace(0, 10);
+    // bc ahead by 3
+
+    // Balance out intermediate-string lengths.
+    ab.addUnchanged(3);
+    expected_ac.addUnchanged(3);
+
+    // Deletions meet insertions.
+    // Output order is arbitrary in principle, but we expect insertions first
+    // and want to keep it that way.
+    ab.addReplace(2, 0);
+    ab.addReplace(4, 0);
+    ab.addReplace(6, 0);
+    bc.addReplace(0, 1);
+    bc.addReplace(0, 3);
+    bc.addReplace(0, 5);
+    expected_ac.addReplace(0, 1);
+    expected_ac.addReplace(0, 3);
+    expected_ac.addReplace(0, 5);
+    expected_ac.addReplace(2, 0);
+    expected_ac.addReplace(4, 0);
+    expected_ac.addReplace(6, 0);
+
+    // End with a non-change, so that further edits are never reordered.
+    ab.addUnchanged(1);
+    bc.addUnchanged(1);
+    expected_ac.addUnchanged(1);
+
+    ac.mergeAndAppend(ab, bc, errorCode);
+    assertSuccess("ab+bc", errorCode);
+    if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
+        return;
+    }
+
+    // Append more Edits.
+    Edits ab2, bc2;
+    ab2.addUnchanged(5);
+    bc2.addReplace(1, 2);
+    bc2.addUnchanged(4);
+    expected_ac.addReplace(1, 2);
+    expected_ac.addUnchanged(4);
+    ac.mergeAndAppend(ab2, bc2, errorCode);
+    assertSuccess("ab2+bc2", errorCode);
+    if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
+        return;
+    }
+
+    // Append empty edits.
+    Edits empty;
+    ac.mergeAndAppend(empty, empty, errorCode);
+    assertSuccess("empty+empty", errorCode);
+    if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
+        return;
+    }
+
+    // Error: Append more edits with mismatched intermediate-string lengths.
+    Edits mismatch;
+    mismatch.addReplace(1, 1);
+    ac.mergeAndAppend(ab2, mismatch, errorCode);
+    assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
+    errorCode.reset();
+    ac.mergeAndAppend(mismatch, bc2, errorCode);
+    assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
+    errorCode.reset();
+}
+
 void StringCaseTest::TestCaseMapWithEdits() {
     IcuTestErrorCode errorCode(*this, "TestEdits");
     UChar dest[20];
index 742b94004e789651dc2aa5cbbe0c72d45b111c7f..adea11d7646181f00da9fa2d243bc3b3a4e181c3 100644 (file)
@@ -10,6 +10,8 @@
 **********************************************************************
 */
 
+#include <algorithm>
+#include <vector>
 #include "unicode/utypes.h"
 #include "unicode/edits.h"
 #include "unicode/unistr.h"
@@ -65,6 +67,100 @@ UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
     return buf;
 }
 
+namespace {
+
+UnicodeString printOneEdit(const Edits::Iterator &ei) {
+    if (ei.hasChange()) {
+        return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
+    } else {
+        return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
+    }
+}
+
+/**
+ * Maps indexes according to the expected edits.
+ * A destination index can occur multiple times when there are source deletions.
+ * Map according to the last occurrence, normally in a non-empty destination span.
+ * Simplest is to search from the back.
+ */
+int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
+                         int32_t srcLength, int32_t destLength, int32_t index) {
+    int32_t srcIndex = srcLength;
+    int32_t destIndex = destLength;
+    int32_t i = expLength;
+    while (index < destIndex && i > 0) {
+        --i;
+        int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
+        int32_t prevDestIndex = destIndex - expected[i].newLength;
+        if (index == prevDestIndex) {
+            return prevSrcIndex;
+        } else if (index > prevDestIndex) {
+            if (expected[i].change) {
+                // In a change span, map to its end.
+                return srcIndex;
+            } else {
+                // In an unchanged span, offset within it.
+                return prevSrcIndex + (index - prevDestIndex);
+            }
+        }
+        srcIndex = prevSrcIndex;
+        destIndex = prevDestIndex;
+    }
+    // index is outside the string.
+    return srcIndex;
+}
+
+int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
+                         int32_t srcLength, int32_t destLength, int32_t index) {
+    int32_t srcIndex = srcLength;
+    int32_t destIndex = destLength;
+    int32_t i = expLength;
+    while (index < srcIndex && i > 0) {
+        --i;
+        int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
+        int32_t prevDestIndex = destIndex - expected[i].newLength;
+        if (index == prevSrcIndex) {
+            return prevDestIndex;
+        } else if (index > prevSrcIndex) {
+            if (expected[i].change) {
+                // In a change span, map to its end.
+                return destIndex;
+            } else {
+                // In an unchanged span, offset within it.
+                return prevDestIndex + (index - prevSrcIndex);
+            }
+        }
+        srcIndex = prevSrcIndex;
+        destIndex = prevDestIndex;
+    }
+    // index is outside the string.
+    return destIndex;
+}
+
+}  // namespace
+
+// For debugging, set -v to see matching edits up to a failure.
+UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
+                                   const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
+    Edits::Iterator ei1 = e1.getFineIterator();
+    Edits::Iterator ei2 = e2.getFineIterator();
+    UBool ok = TRUE;
+    for (int32_t i = 0; ok; ++i) {
+        UBool ei1HasNext = ei1.next(errorCode);
+        UBool ei2HasNext = ei2.next(errorCode);
+        ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
+                                ei1HasNext, ei2HasNext);
+        ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
+        ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
+                                printOneEdit(ei1), printOneEdit(ei2));
+        if (!ei1HasNext || !ei2HasNext) {
+            break;
+        }
+        test.logln();
+    }
+    return ok;
+}
+
 void TestUtility::checkEditsIter(
         IntlTest &test,
         const UnicodeString &name,
@@ -77,8 +173,6 @@ void TestUtility::checkEditsIter(
     int32_t expSrcIndex = 0;
     int32_t expDestIndex = 0;
     int32_t expReplIndex = 0;
-    int32_t expSrcIndexFromDest = 0;  // for sourceIndexFromDestinationIndex()
-    int32_t expDestIndexFromSrc = 0;  // for destinationIndexFromSourceIndex()
     for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
         const EditChange &expect = expected[expIndex];
         UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
@@ -92,7 +186,7 @@ void TestUtility::checkEditsIter(
             test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
         }
 
-        if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
+        if (expect.oldLength > 0) {
             test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
             test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
             test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
@@ -108,7 +202,7 @@ void TestUtility::checkEditsIter(
             }
         }
 
-        if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+        if (expect.newLength > 0) {
             test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
             test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
             test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
@@ -124,45 +218,11 @@ void TestUtility::checkEditsIter(
             }
         }
 
-        // Span starts.
-        test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
-                          ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
-        test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
-                          ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
-
-        // Inside unchanged span map offsets 1:1.
-        if (!expect.change && expect.oldLength >= 2) {
-            test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
-                              ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
-            test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
-                              ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
-        }
-
-        // Inside change span map to the span limit.
-        int32_t expSrcLimit = expSrcIndex + expect.oldLength;
-        int32_t expDestLimit = expDestIndex + expect.newLength;
-        if (expect.change) {
-            if (expect.oldLength >= 2) {
-                test.assertEquals(name + u":" + __LINE__, expDestLimit,
-                                  ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
-            }
-            if (expect.newLength >= 2) {
-                test.assertEquals(name + u":" + __LINE__, expSrcLimit,
-                                  ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
-            }
-        }
-
-        expSrcIndex = expSrcLimit;
-        expDestIndex = expDestLimit;
+        expSrcIndex += expect.oldLength;
+        expDestIndex += expect.newLength;
         if (expect.change) {
             expReplIndex += expect.newLength;
         }
-        if (expect.newLength > 0) {
-            expSrcIndexFromDest = expSrcIndex;
-        }
-        if (expect.oldLength > 0) {
-            expDestIndexFromSrc = expDestIndex;
-        }
     }
     UnicodeString msg = UnicodeString(name).append(u" end");
     test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
@@ -175,8 +235,47 @@ void TestUtility::checkEditsIter(
 
     test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
     test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
-    test.assertEquals(name + u":" + __LINE__, expDestIndex,
-                      ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
-    test.assertEquals(name + u":" + __LINE__, expSrcIndex,
-                      ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
+
+    // Check mapping of all indexes against a simple implementation
+    // that works on the expected changes.
+    // Iterate once forward, once backward, to cover more runtime conditions.
+    int32_t srcLength = expSrcIndex;
+    int32_t destLength = expDestIndex;
+    std::vector<int32_t> srcIndexes;
+    std::vector<int32_t> destIndexes;
+    srcIndexes.push_back(-1);
+    destIndexes.push_back(-1);
+    int32_t srcIndex = 0;
+    int32_t destIndex = 0;
+    for (int32_t i = 0; i < expLength; ++i) {
+        if (expected[i].oldLength > 0) {
+            srcIndexes.push_back(srcIndex);
+            if (expected[i].oldLength > 1) {
+                srcIndexes.push_back(srcIndex + 1);
+            }
+        }
+        if (expected[i].newLength > 0) {
+            destIndexes.push_back(destIndex);
+            if (expected[i].newLength > 0) {
+                destIndexes.push_back(destIndex + 1);
+            }
+        }
+        srcIndex += expected[i].oldLength;
+        destIndex += expected[i].newLength;
+    }
+    srcIndexes.push_back(srcLength);
+    destIndexes.push_back(destLength);
+    srcIndexes.push_back(srcLength + 1);
+    destIndexes.push_back(destLength + 1);
+    std::reverse(destIndexes.begin(), destIndexes.end());
+    for (int32_t i : srcIndexes) {
+        test.assertEquals(name + u" destIndexFromSrc(" + i + u"):" + __LINE__,
+                          destIndexFromSrc(expected, expLength, srcLength, destLength, i),
+                          ei2.destinationIndexFromSourceIndex(i, errorCode));
+    }
+    for (int32_t i : destIndexes) {
+        test.assertEquals(name + u" srcIndexFromDest(" + i + u"):" + __LINE__,
+                          srcIndexFromDest(expected, expLength, srcLength, destLength, i),
+                          ei2.sourceIndexFromDestinationIndex(i, errorCode));
+    }
 }
index 920d42151881aaef3ccc27382f9c2111496714af..6d997a78f42bca4cd6cc1c3f6827e2a6f5789afc 100644 (file)
@@ -37,6 +37,9 @@ public:
 
     static UnicodeString hex(const uint8_t* bytes, int32_t len);
 
+    static UBool checkEqualEdits(IntlTest &test, const UnicodeString &name,
+                                 const Edits &e1, const Edits &e2, UErrorCode &errorCode);
+
     static void checkEditsIter(
         IntlTest &test, const UnicodeString &name,
         Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators