]> granicus.if.org Git - icu/commitdiff
ICU-13270 icu::Edits add numberOfChanges(); Edits::Iterator add findDestinationIndex...
authorMarkus Scherer <markus.icu@gmail.com>
Mon, 24 Jul 2017 22:43:53 +0000 (22:43 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Mon, 24 Jul 2017 22:43:53 +0000 (22:43 +0000)
X-SVN-Rev: 40286

icu4c/source/common/edits.cpp
icu4c/source/common/unicode/edits.h
icu4c/source/test/intltest/strcase.cpp
icu4c/source/test/intltest/testutil.cpp
icu4c/source/test/intltest/tstnorm.cpp
icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java

index 58a70d5c92796e69c21920d6a1cab982cfaf9d7d..ee1b22d06c940e9584ad90ddb1c7b8c656d40495 100644 (file)
@@ -40,7 +40,7 @@ Edits::~Edits() {
 }
 
 void Edits::reset() {
-    length = delta = 0;
+    length = delta = numChanges = 0;
 }
 
 void Edits::addUnchanged(int32_t unchangedLength) {
@@ -76,6 +76,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
     if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
         // Replacement of short oldLength text units by same-length new text.
         // Merge into previous short-replacement record, if any.
+        ++numChanges;
         int32_t last = lastUnit();
         if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
                 (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
@@ -93,6 +94,7 @@ void Edits::addReplace(int32_t oldLength, int32_t newLength) {
     if (oldLength == 0 && newLength == 0) {
         return;
     }
+    ++numChanges;
     int32_t newDelta = newLength - oldLength;
     if (newDelta != 0) {
         if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
@@ -182,18 +184,6 @@ UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
     return TRUE;
 }
 
-UBool Edits::hasChanges() const {
-    if (delta != 0) {
-        return TRUE;
-    }
-    for (int32_t i = 0; i < length; ++i) {
-        if (array[i] > MAX_UNCHANGED) {
-            return TRUE;
-        }
-    }
-    return FALSE;
-}
-
 Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
         array(a), index(0), length(len), remaining(0),
         onlyChanges_(oc), coarse(crs),
@@ -308,39 +298,97 @@ UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) {
     return TRUE;
 }
 
-UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
-    if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
-    if (i < srcIndex) {
+int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode) || i < 0) { return -1; }
+    int32_t spanStart, spanLength;
+    if (findSource) {  // find source index
+        spanStart = srcIndex;
+        spanLength = oldLength_;
+    } else {  // find destination index
+        spanStart = destIndex;
+        spanLength = newLength_;
+    }
+    // If we are at the start or limit of an empty span, then we search from
+    // the start of the string so that we always return
+    // the first of several consecutive empty spans, for consistent results.
+    // We do not currently track the properties of the previous span,
+    // so for now we always reset if we are at the start of the current span.
+    if (i <= spanStart) {
         // Reset the iterator to the start.
         index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
-    } else if (i < (srcIndex + oldLength_)) {
+    } else if (i < (spanStart + spanLength)) {
         // The index is in the current span.
-        return TRUE;
+        return 0;
     }
     while (next(FALSE, errorCode)) {
-        if (i < (srcIndex + oldLength_)) {
-            // The index is in the current span.
-            return TRUE;
+        if (findSource) {
+            spanStart = srcIndex;
+            spanLength = oldLength_;
+        } else {
+            spanStart = destIndex;
+            spanLength = newLength_;
+        }
+        if (i == spanStart || i < (spanStart + spanLength)) {
+            // The index is in the current span, or at an empty one.
+            return 0;
         }
         if (remaining > 0) {
             // Is the index in one of the remaining compressed edits?
-            // srcIndex is the start of the current span, before the remaining ones.
-            int32_t len = (remaining + 1) * oldLength_;
-            if (i < (srcIndex + len)) {
-                int32_t n = (i - srcIndex) / oldLength_;  // 1 <= n <= remaining
-                len = n * oldLength_;
+            // spanStart is the start of the current span, before the remaining ones.
+            int32_t len = (remaining + 1) * spanLength;
+            if (i < (spanStart + len)) {
+                int32_t n = (i - spanStart) / spanLength;  // 1 <= n <= remaining
+                len = n * spanLength;
                 srcIndex += len;
                 replIndex += len;
                 destIndex += len;
                 remaining -= n;
-                return TRUE;
+                return 0;
             }
             // Make next() skip all of these edits at once.
             oldLength_ = newLength_ = len;
             remaining = 0;
         }
     }
-    return FALSE;
+    return 1;
+}
+
+int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
+    int32_t where = findIndex(i, TRUE, errorCode);
+    if (where < 0) {
+        // Error or before the string.
+        return 0;
+    }
+    if (where > 0 || i == srcIndex) {
+        // At or after string length, or at start of the found span.
+        return destIndex;
+    }
+    if (changed) {
+        // In a change span, map to its end.
+        return destIndex + newLength_;
+    } else {
+        // In an unchanged span, offset 1:1 within it.
+        return destIndex + (i - srcIndex);
+    }
+}
+
+int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
+    int32_t where = findIndex(i, FALSE, errorCode);
+    if (where < 0) {
+        // Error or before the string.
+        return 0;
+    }
+    if (where > 0 || i == destIndex) {
+        // At or after string length, or at start of the found span.
+        return srcIndex;
+    }
+    if (changed) {
+        // In a change span, map to its end.
+        return srcIndex + oldLength_;
+    } else {
+        // In an unchanged span, offset within it.
+        return srcIndex + (i - destIndex);
+    }
 }
 
 U_NAMESPACE_END
index 8d3becb7a2a58027f09bd3423a368624536a9d36..21ea0d1184ec7803be4dfc4878941b9e8d6117dd 100644 (file)
@@ -36,7 +36,7 @@ public:
      * @draft ICU 59
      */
     Edits() :
-            array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
+            array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
             errorCode(U_ZERO_ERROR) {}
     /**
      * Destructor.
@@ -66,6 +66,9 @@ public:
      * Sets the UErrorCode if an error occurred while recording edits.
      * Preserves older error codes in the outErrorCode.
      * Normally called from inside ICU string transformation functions, not user code.
+     * @param outErrorCode Set to an error code if it does not contain one already
+     *                  and an error occurred while recording edits.
+     *                  Otherwise unchanged.
      * @return TRUE if U_FAILURE(outErrorCode)
      * @draft ICU 59
      */
@@ -81,7 +84,13 @@ public:
      * @return TRUE if there are any change edits
      * @draft ICU 59
      */
-    UBool hasChanges() const;
+    UBool hasChanges() const { return numChanges != 0; }
+
+    /**
+     * @return the number of change edits
+     * @draft ICU 60
+     */
+    int32_t numberOfChanges() const { return numChanges; }
 
     /**
      * Access to the list of edits.
@@ -103,6 +112,9 @@ public:
 
         /**
          * Advances to the next edit.
+         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+         *                  or else the function returns immediately. Check for U_FAILURE()
+         *                  on output or use with function chaining. (See User Guide for details.)
          * @return TRUE if there is another edit
          * @draft ICU 59
          */
@@ -121,10 +133,86 @@ public:
          * if the source index is out of bounds for the source string.
          *
          * @param i source index
+         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+         *                  or else the function returns immediately. Check for U_FAILURE()
+         *                  on output or use with function chaining. (See User Guide for details.)
          * @return TRUE if the edit for the source index was found
          * @draft ICU 59
          */
-        UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
+        UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
+            return findIndex(i, TRUE, errorCode) == 0;
+        }
+
+        /**
+         * Finds the edit that contains the destination index.
+         * The destination index may be found in a non-change
+         * even if normal iteration would skip non-changes.
+         * Normal iteration can continue from a found edit.
+         *
+         * The iterator state before this search logically does not matter.
+         * (It may affect the performance of the search.)
+         *
+         * The iterator state after this search is undefined
+         * if the source index is out of bounds for the source string.
+         *
+         * @param i destination index
+         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+         *                  or else the function returns immediately. Check for U_FAILURE()
+         *                  on output or use with function chaining. (See User Guide for details.)
+         * @return TRUE if the edit for the destination index was found
+         * @draft ICU 60
+         */
+        UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
+            return findIndex(i, FALSE, errorCode) == 0;
+        }
+
+        /**
+         * Returns the destination index corresponding to the given source index.
+         * If the source index is inside a change edit (not at its start),
+         * then the destination index at the end of that edit is returned,
+         * since there is no information about index mapping inside a change edit.
+         *
+         * (This means that indexes to the start and middle of an edit,
+         * for example around a grapheme cluster, are mapped to indexes
+         * encompassing the entire edit.
+         * The alternative, mapping an interior index to the start,
+         * would map such an interval to an empty one.)
+         *
+         * This operation will usually but not always modify this object.
+         * The iterator state after this search is undefined.
+         *
+         * @param i source index
+         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+         *                  or else the function returns immediately. Check for U_FAILURE()
+         *                  on output or use with function chaining. (See User Guide for details.)
+         * @return destination index; undefined if i is not 0..string length
+         * @draft ICU 60
+         */
+        int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
+
+        /**
+         * Returns the source index corresponding to the given destination index.
+         * If the destination index is inside a change edit (not at its start),
+         * then the source index at the end of that edit is returned,
+         * since there is no information about index mapping inside a change edit.
+         *
+         * (This means that indexes to the start and middle of an edit,
+         * for example around a grapheme cluster, are mapped to indexes
+         * encompassing the entire edit.
+         * The alternative, mapping an interior index to the start,
+         * would map such an interval to an empty one.)
+         *
+         * This operation will usually but not always modify this object.
+         * The iterator state after this search is undefined.
+         *
+         * @param i destination index
+         * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+         *                  or else the function returns immediately. Check for U_FAILURE()
+         *                  on output or use with function chaining. (See User Guide for details.)
+         * @return source index; undefined if i is not 0..string length
+         * @draft ICU 60
+         */
+        int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
 
         /**
          * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
@@ -170,6 +258,8 @@ public:
         void updateIndexes();
         UBool noNext();
         UBool next(UBool onlyChanges, UErrorCode &errorCode);
+        /** @return -1: error or i<0; 0: found; 1: i>=string length */
+        int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
 
         const uint16_t *array;
         int32_t index, length;
@@ -234,6 +324,7 @@ private:
     int32_t capacity;
     int32_t length;
     int32_t delta;
+    int32_t numChanges;
     UErrorCode errorCode;
     uint16_t stackArray[STACK_CAPACITY];
 };
index a3901b2302e6351ca0a0cbf4947d7e88e60f8114..c8e48c46445b83b0e52deacb38eb6941e5aa18dc 100644 (file)
@@ -906,13 +906,15 @@ void StringCaseTest::TestBufferOverflow() {
 void StringCaseTest::TestEdits() {
     IcuTestErrorCode errorCode(*this, "TestEdits");
     Edits edits;
-    assertFalse("new Edits", edits.hasChanges());
+    assertFalse("new Edits hasChanges", edits.hasChanges());
+    assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
     assertEquals("new Edits", 0, edits.lengthDelta());
     edits.addUnchanged(1);  // multiple unchanged ranges are combined
     edits.addUnchanged(10000);  // too long, and they are split
     edits.addReplace(0, 0);
     edits.addUnchanged(2);
-    assertFalse("unchanged 10003", edits.hasChanges());
+    assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
+    assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
     assertEquals("unchanged 10003", 0, edits.lengthDelta());
     edits.addReplace(1, 1);  // multiple short equal-length edits are compressed
     edits.addUnchanged(0);
@@ -922,7 +924,8 @@ void StringCaseTest::TestEdits() {
     edits.addReplace(100, 0);
     edits.addReplace(3000, 4000);  // variable-length encoding
     edits.addReplace(100000, 100000);
-    assertTrue("some edits", edits.hasChanges());
+    assertTrue("some edits hasChanges", edits.hasChanges());
+    assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
     assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
     UErrorCode outErrorCode = U_ZERO_ERROR;
     assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
@@ -956,7 +959,8 @@ void StringCaseTest::TestEdits() {
             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
 
     edits.reset();
-    assertFalse("reset", edits.hasChanges());
+    assertFalse("reset hasChanges", edits.hasChanges());
+    assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
     assertEquals("reset", 0, edits.lengthDelta());
     Edits::Iterator ei = edits.getCoarseChangesIterator();
     assertFalse("reset then iterator", ei.next(errorCode));
index a78429843dceb1141902b3dc088fdce8d811f3a8..742b94004e789651dc2aa5cbbe0c72d45b111c7f 100644 (file)
@@ -71,32 +71,35 @@ void TestUtility::checkEditsIter(
         Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
         const EditChange expected[], int32_t expLength, UBool withUnchanged,
         UErrorCode &errorCode) {
-    test.assertFalse(name, ei2.findSourceIndex(-1, errorCode));
+    test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
+    test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
 
     int32_t expSrcIndex = 0;
     int32_t expDestIndex = 0;
     int32_t expReplIndex = 0;
+    int32_t expSrcIndexFromDest = 0;  // for sourceIndexFromDestinationIndex()
+    int32_t expDestIndexFromSrc = 0;  // for destinationIndexFromSourceIndex()
     for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
         const EditChange &expect = expected[expIndex];
         UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
         if (withUnchanged || expect.change) {
-            test.assertTrue(msg, ei1.next(errorCode));
-            test.assertEquals(msg, expect.change, ei1.hasChange());
-            test.assertEquals(msg, expect.oldLength, ei1.oldLength());
-            test.assertEquals(msg, expect.newLength, ei1.newLength());
-            test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
-            test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
-            test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
+            test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
+            test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
+            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
+            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
+            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
+            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
+            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
         }
 
-        if (expect.oldLength > 0) {
-            test.assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
-            test.assertEquals(msg, expect.change, ei2.hasChange());
-            test.assertEquals(msg, expect.oldLength, ei2.oldLength());
-            test.assertEquals(msg, expect.newLength, ei2.newLength());
-            test.assertEquals(msg, expSrcIndex, ei2.sourceIndex());
-            test.assertEquals(msg, expDestIndex, ei2.destinationIndex());
-            test.assertEquals(msg, expReplIndex, ei2.replacementIndex());
+        if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
+            test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
+            test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
+            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
+            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
+            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
+            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
+            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
             if (!withUnchanged) {
                 // For some iterators, move past the current range
                 // so that findSourceIndex() has to look before the current index.
@@ -105,20 +108,75 @@ void TestUtility::checkEditsIter(
             }
         }
 
-        expSrcIndex += expect.oldLength;
-        expDestIndex += expect.newLength;
+        if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+            test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
+            test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
+            test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
+            test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
+            test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
+            test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
+            test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
+            if (!withUnchanged) {
+                // For some iterators, move past the current range
+                // so that findSourceIndex() has to look before the current index.
+                ei2.next(errorCode);
+                ei2.next(errorCode);
+            }
+        }
+
+        // Span starts.
+        test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
+                          ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
+        test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
+                          ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
+
+        // Inside unchanged span map offsets 1:1.
+        if (!expect.change && expect.oldLength >= 2) {
+            test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
+                              ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
+            test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
+                              ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
+        }
+
+        // Inside change span map to the span limit.
+        int32_t expSrcLimit = expSrcIndex + expect.oldLength;
+        int32_t expDestLimit = expDestIndex + expect.newLength;
+        if (expect.change) {
+            if (expect.oldLength >= 2) {
+                test.assertEquals(name + u":" + __LINE__, expDestLimit,
+                                  ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
+            }
+            if (expect.newLength >= 2) {
+                test.assertEquals(name + u":" + __LINE__, expSrcLimit,
+                                  ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
+            }
+        }
+
+        expSrcIndex = expSrcLimit;
+        expDestIndex = expDestLimit;
         if (expect.change) {
             expReplIndex += expect.newLength;
         }
+        if (expect.newLength > 0) {
+            expSrcIndexFromDest = expSrcIndex;
+        }
+        if (expect.oldLength > 0) {
+            expDestIndexFromSrc = expDestIndex;
+        }
     }
     UnicodeString msg = UnicodeString(name).append(u" end");
-    test.assertFalse(msg, ei1.next(errorCode));
-    test.assertFalse(msg, ei1.hasChange());
-    test.assertEquals(msg, 0, ei1.oldLength());
-    test.assertEquals(msg, 0, ei1.newLength());
-    test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
-    test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
-    test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
-
-    test.assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
+    test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
+    test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
+    test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
+    test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
+    test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
+    test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
+    test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
+
+    test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
+    test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
+    test.assertEquals(name + u":" + __LINE__, expDestIndex,
+                      ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
+    test.assertEquals(name + u":" + __LINE__, expSrcIndex,
+                      ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
 }
index b79232e5bfdec768781d310610f8210eedc8eda8..6ca80f9decd7525b3cd4aa809560f105ece429ec 100644 (file)
@@ -1562,6 +1562,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
         { TRUE, 6, 3 },  // ê°€\u3133→ ê°ƒ
         { FALSE, 2, 2 }  // 2 spaces
     };
+    assertTrue("normalizeUTF8 with Edits hasChanges", edits.hasChanges());
+    assertEquals("normalizeUTF8 with Edits numberOfChanges", 9, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"normalizeUTF8 with Edits",
             edits.getFineIterator(), edits.getFineIterator(),
             expectedChanges, UPRV_LENGTHOF(expectedChanges),
@@ -1577,6 +1579,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     nfkc_cf->normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
     assertSuccess("normalizeUTF8 omit unchanged", errorCode.get());
     assertEquals("normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+    assertTrue("normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
+    assertEquals("normalizeUTF8 omit unchanged numberOfChanges", 9, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"normalizeUTF8 omit unchanged",
             edits.getFineIterator(), edits.getFineIterator(),
             expectedChanges, UPRV_LENGTHOF(expectedChanges),
@@ -1604,6 +1608,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
         { TRUE, 6, 3 },  // ê°€\u3133→ ê°ƒ
         { FALSE, 2, 2 }  // 2 spaces
     };
+    assertTrue("filtered normalizeUTF8 hasChanges", edits.hasChanges());
+    assertEquals("filtered normalizeUTF8 numberOfChanges", 7, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8",
             edits.getFineIterator(), edits.getFineIterator(),
             filteredChanges, UPRV_LENGTHOF(filteredChanges),
@@ -1621,6 +1627,8 @@ BasicNormalizerTest::TestNormalizeUTF8WithEdits() {
     fn2.normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
     assertSuccess("filtered normalizeUTF8 omit unchanged", errorCode.get());
     assertEquals("filtered normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+    assertTrue("filtered normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
+    assertEquals("filtered normalizeUTF8 omit unchanged numberOfChanges", 7, edits.numberOfChanges());
     TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8 omit unchanged",
             edits.getFineIterator(), edits.getFineIterator(),
             filteredChanges, UPRV_LENGTHOF(filteredChanges),
index f9cbf9fb4a6930b562ab93f4bb83ef550172270b..9f413cf9fd3393417cb73b894fd96a49e60a7905 100644 (file)
@@ -36,6 +36,7 @@ public final class Edits {
     private char[] array;
     private int length;
     private int delta;
+    private int numChanges;
 
     /**
      * Constructs an empty object.
@@ -52,7 +53,7 @@ public final class Edits {
      * @provisional This API might change or be removed in a future release.
      */
     public void reset() {
-        length = delta = 0;
+        length = delta = numChanges = 0;
     }
 
     private void setLastUnit(int last) {
@@ -105,6 +106,7 @@ public final class Edits {
         if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
             // Replacement of short oldLength text units by same-length new text.
             // Merge into previous short-replacement record, if any.
+            ++numChanges;
             int last = lastUnit();
             if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
                     (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
@@ -123,6 +125,7 @@ public final class Edits {
         if (oldLength == 0 && newLength == 0) {
             return;
         }
+        ++numChanges;
         int newDelta = newLength - oldLength;
         if (newDelta != 0) {
             if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
@@ -202,17 +205,14 @@ public final class Edits {
      * @draft ICU 59
      * @provisional This API might change or be removed in a future release.
      */
-    public boolean hasChanges()  {
-        if (delta != 0) {
-            return true;
-        }
-        for (int i = 0; i < length; ++i) {
-            if (array[i] > MAX_UNCHANGED) {
-                return true;
-            }
-        }
-        return false;
-    }
+    public boolean hasChanges()  { return numChanges != 0; }
+
+    /**
+     * @return the number of change edits
+     * @draft ICU 60
+     * @provisional This API might change or be removed in a future release.
+     */
+    public int numberOfChanges() { return numChanges; }
 
     /**
      * Access to the list of edits.
@@ -374,38 +374,162 @@ public final class Edits {
          * @provisional This API might change or be removed in a future release.
          */
         public boolean findSourceIndex(int i) {
-            if (i < 0) { return false; }
-            if (i < srcIndex) {
+            return findIndex(i, true) == 0;
+        }
+
+        /**
+         * Finds the edit that contains the destination index.
+         * The destination index may be found in a non-change
+         * even if normal iteration would skip non-changes.
+         * Normal iteration can continue from a found edit.
+         *
+         * <p>The iterator state before this search logically does not matter.
+         * (It may affect the performance of the search.)
+         *
+         * <p>The iterator state after this search is undefined
+         * if the source index is out of bounds for the source string.
+         *
+         * @param i destination index
+         * @return true if the edit for the destination index was found
+         * @draft ICU 60
+         * @provisional This API might change or be removed in a future release.
+         */
+        public boolean findDestinationIndex(int i) {
+            return findIndex(i, false) == 0;
+        }
+
+        /** @return -1: error or i<0; 0: found; 1: i>=string length */
+        private int findIndex(int i, boolean findSource) {
+            if (i < 0) { return -1; }
+            int spanStart, spanLength;
+            if (findSource) {  // find source index
+                spanStart = srcIndex;
+                spanLength = oldLength_;
+            } else {  // find destination index
+                spanStart = destIndex;
+                spanLength = newLength_;
+            }
+            // If we are at the start or limit of an empty span, then we search from
+            // the start of the string so that we always return
+            // the first of several consecutive empty spans, for consistent results.
+            // We do not currently track the properties of the previous span,
+            // so for now we always reset if we are at the start of the current span.
+            if (i <= spanStart) {
                 // Reset the iterator to the start.
                 index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
-            } else if (i < (srcIndex + oldLength_)) {
+            } else if (i < (spanStart + spanLength)) {
                 // The index is in the current span.
-                return true;
+                return 0;
             }
             while (next(false)) {
-                if (i < (srcIndex + oldLength_)) {
-                    // The index is in the current span.
-                    return true;
+                if (findSource) {
+                    spanStart = srcIndex;
+                    spanLength = oldLength_;
+                } else {
+                    spanStart = destIndex;
+                    spanLength = newLength_;
+                }
+                if (i == spanStart || i < (spanStart + spanLength)) {
+                    // The index is in the current span, or at an empty one.
+                    return 0;
                 }
                 if (remaining > 0) {
                     // Is the index in one of the remaining compressed edits?
-                    // srcIndex is the start of the current span, before the remaining ones.
-                    int len = (remaining + 1) * oldLength_;
-                    if (i < (srcIndex + len)) {
-                        int n = (i - srcIndex) / oldLength_;  // 1 <= n <= remaining
-                        len = n * oldLength_;
+                    // spanStart is the start of the current span, before the remaining ones.
+                    int len = (remaining + 1) * spanLength;
+                    if (i < (spanStart + len)) {
+                        int n = (i - spanStart) / spanLength;  // 1 <= n <= remaining
+                        len = n * spanLength;
                         srcIndex += len;
                         replIndex += len;
                         destIndex += len;
                         remaining -= n;
-                        return true;
+                        return 0;
                     }
                     // Make next() skip all of these edits at once.
                     oldLength_ = newLength_ = len;
                     remaining = 0;
                 }
             }
-            return false;
+            return 1;
+        }
+
+        /**
+         * Returns the destination index corresponding to the given source index.
+         * If the source index is inside a change edit (not at its start),
+         * then the destination index at the end of that edit is returned,
+         * since there is no information about index mapping inside a change edit.
+         *
+         * <p>(This means that indexes to the start and middle of an edit,
+         * for example around a grapheme cluster, are mapped to indexes
+         * encompassing the entire edit.
+         * The alternative, mapping an interior index to the start,
+         * would map such an interval to an empty one.)
+         *
+         * <p>This operation will usually but not always modify this object.
+         * The iterator state after this search is undefined.
+         *
+         * @param i source index
+         * @return destination index; undefined if i is not 0..string length
+         * @draft ICU 60
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int destinationIndexFromSourceIndex(int i) {
+            int where = findIndex(i, true);
+            if (where < 0) {
+                // Error or before the string.
+                return 0;
+            }
+            if (where > 0 || i == srcIndex) {
+                // At or after string length, or at start of the found span.
+                return destIndex;
+            }
+            if (changed) {
+                // In a change span, map to its end.
+                return destIndex + newLength_;
+            } else {
+                // In an unchanged span, offset 1:1 within it.
+                return destIndex + (i - srcIndex);
+            }
+        }
+
+        /**
+         * Returns the source index corresponding to the given destination index.
+         * If the destination index is inside a change edit (not at its start),
+         * then the source index at the end of that edit is returned,
+         * since there is no information about index mapping inside a change edit.
+         *
+         * <p>(This means that indexes to the start and middle of an edit,
+         * for example around a grapheme cluster, are mapped to indexes
+         * encompassing the entire edit.
+         * The alternative, mapping an interior index to the start,
+         * would map such an interval to an empty one.)
+         *
+         * <p>This operation will usually but not always modify this object.
+         * The iterator state after this search is undefined.
+         *
+         * @param i destination index
+         * @return source index; undefined if i is not 0..string length
+         * @draft ICU 60
+         * @provisional This API might change or be removed in a future release.
+         */
+        public int sourceIndexFromDestinationIndex(int i) {
+            int where = findIndex(i, false);
+            if (where < 0) {
+                // Error or before the string.
+                return 0;
+            }
+            if (where > 0 || i == destIndex) {
+                // At or after string length, or at start of the found span.
+                return srcIndex;
+            }
+            if (changed) {
+                // In a change span, map to its end.
+                return srcIndex + oldLength_;
+            } else {
+                // In an unchanged span, offset within it.
+                return srcIndex + (i - destIndex);
+            }
         }
 
         /**
index 8075fef6c8854870d313faf9224dee6b3cd87aa7..1df35243816db58223accf5c70ba604c1eec7e93 100644 (file)
@@ -781,10 +781,13 @@ public final class UCharacterCaseTest extends TestFmwk
             String name, Edits.Iterator ei1, Edits.Iterator ei2,  // two equal iterators
             EditChange[] expected, boolean withUnchanged) {
         assertFalse(name, ei2.findSourceIndex(-1));
+        assertFalse(name, ei2.findDestinationIndex(-1));
 
         int expSrcIndex = 0;
         int expDestIndex = 0;
         int expReplIndex = 0;
+        int expSrcIndexFromDest = 0;  // for sourceIndexFromDestinationIndex()
+        int expDestIndexFromSrc = 0;  // for destinationIndexFromSourceIndex()
         for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
             EditChange expect = expected[expIndex];
             String msg = name + ' ' + expIndex;
@@ -798,7 +801,7 @@ public final class UCharacterCaseTest extends TestFmwk
                 assertEquals(msg, expReplIndex, ei1.replacementIndex());
             }
 
-            if (expect.oldLength > 0) {
+            if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
                 assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
                 assertEquals(msg, expect.change, ei2.hasChange());
                 assertEquals(msg, expect.oldLength, ei2.oldLength());
@@ -814,11 +817,61 @@ public final class UCharacterCaseTest extends TestFmwk
                 }
             }
 
-            expSrcIndex += expect.oldLength;
-            expDestIndex += expect.newLength;
+            if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+                assertTrue(msg, ei2.findDestinationIndex(expDestIndex));
+                assertEquals(msg, expect.change, ei2.hasChange());
+                assertEquals(msg, expect.oldLength, ei2.oldLength());
+                assertEquals(msg, expect.newLength, ei2.newLength());
+                assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+                assertEquals(msg, expDestIndex, ei2.destinationIndex());
+                assertEquals(msg, expReplIndex, ei2.replacementIndex());
+                if (!withUnchanged) {
+                    // For some iterators, move past the current range
+                    // so that findSourceIndex() has to look before the current index.
+                    ei2.next();
+                    ei2.next();
+                }
+            }
+
+            // Span starts.
+            assertEquals(name, expDestIndexFromSrc,
+                    ei2.destinationIndexFromSourceIndex(expSrcIndex));
+            assertEquals(name, expSrcIndexFromDest,
+                    ei2.sourceIndexFromDestinationIndex(expDestIndex));
+
+            // Inside unchanged span map offsets 1:1.
+            if (!expect.change && expect.oldLength >= 2) {
+                assertEquals(name, expDestIndex + 1,
+                        ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
+                assertEquals(name, expSrcIndex + 1,
+                        ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
+            }
+
+            // Inside change span map to the span limit.
+            int expSrcLimit = expSrcIndex + expect.oldLength;
+            int expDestLimit = expDestIndex + expect.newLength;
+            if (expect.change) {
+                if (expect.oldLength >= 2) {
+                    assertEquals(name, expDestLimit,
+                            ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
+                }
+                if (expect.newLength >= 2) {
+                    assertEquals(name, expSrcLimit,
+                            ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
+                }
+            }
+
+            expSrcIndex = expSrcLimit;
+            expDestIndex = expDestLimit;
             if (expect.change) {
                 expReplIndex += expect.newLength;
             }
+            if (expect.newLength > 0) {
+                expSrcIndexFromDest = expSrcIndex;
+            }
+            if (expect.oldLength > 0) {
+                expDestIndexFromSrc = expDestIndex;
+            }
         }
         String msg = name + " end";
         assertFalse(msg, ei1.next());
@@ -830,18 +883,23 @@ public final class UCharacterCaseTest extends TestFmwk
         assertEquals(msg, expReplIndex, ei1.replacementIndex());
 
         assertFalse(name, ei2.findSourceIndex(expSrcIndex));
+        assertFalse(name, ei2.findDestinationIndex(expDestIndex));
+        assertEquals(name, expDestIndex, ei2.destinationIndexFromSourceIndex(expSrcIndex));
+        assertEquals(name, expSrcIndex, ei2.sourceIndexFromDestinationIndex(expDestIndex));
     }
 
     @Test
     public void TestEdits() {
         Edits edits = new Edits();
-        assertFalse("new Edits", edits.hasChanges());
+        assertFalse("new Edits hasChanges", edits.hasChanges());
+        assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
         assertEquals("new Edits", 0, edits.lengthDelta());
         edits.addUnchanged(1);  // multiple unchanged ranges are combined
         edits.addUnchanged(10000);  // too long, and they are split
         edits.addReplace(0, 0);
         edits.addUnchanged(2);
-        assertFalse("unchanged 10003", edits.hasChanges());
+        assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
+        assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
         assertEquals("unchanged 10003", 0, edits.lengthDelta());
         edits.addReplace(1, 1);  // multiple short equal-length edits are compressed
         edits.addUnchanged(0);
@@ -851,7 +909,8 @@ public final class UCharacterCaseTest extends TestFmwk
         edits.addReplace(100, 0);
         edits.addReplace(3000, 4000);  // variable-length encoding
         edits.addReplace(100000, 100000);
-        assertTrue("some edits", edits.hasChanges());
+        assertTrue("some edits hasChanges", edits.hasChanges());
+        assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
         assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
 
         EditChange[] coarseExpectedChanges = new EditChange[] {
@@ -883,7 +942,8 @@ public final class UCharacterCaseTest extends TestFmwk
                 fineExpectedChanges, false);
 
         edits.reset();
-        assertFalse("reset", edits.hasChanges());
+        assertFalse("reset hasChanges", edits.hasChanges());
+        assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
         assertEquals("reset", 0, edits.lengthDelta());
         Edits.Iterator ei = edits.getCoarseChangesIterator();
         assertFalse("reset then iterator", ei.next());