}
void Edits::reset() {
- length = delta = 0;
+ length = delta = numChanges = 0;
}
void Edits::addUnchanged(int32_t unchangedLength) {
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
+ ++numChanges;
int32_t last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
if (oldLength == 0 && newLength == 0) {
return;
}
+ ++numChanges;
int32_t newDelta = newLength - oldLength;
if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) ||
return TRUE;
}
-UBool Edits::hasChanges() const {
- if (delta != 0) {
- return TRUE;
- }
- for (int32_t i = 0; i < length; ++i) {
- if (array[i] > MAX_UNCHANGED) {
- return TRUE;
- }
- }
- return FALSE;
-}
-
Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) :
array(a), index(0), length(len), remaining(0),
onlyChanges_(oc), coarse(crs),
return TRUE;
}
-UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode) || i < 0) { return FALSE; }
- if (i < srcIndex) {
+int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || i < 0) { return -1; }
+ int32_t spanStart, spanLength;
+ if (findSource) { // find source index
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else { // find destination index
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ // If we are at the start or limit of an empty span, then we search from
+ // the start of the string so that we always return
+ // the first of several consecutive empty spans, for consistent results.
+ // We do not currently track the properties of the previous span,
+ // so for now we always reset if we are at the start of the current span.
+ if (i <= spanStart) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
- } else if (i < (srcIndex + oldLength_)) {
+ } else if (i < (spanStart + spanLength)) {
// The index is in the current span.
- return TRUE;
+ return 0;
}
while (next(FALSE, errorCode)) {
- if (i < (srcIndex + oldLength_)) {
- // The index is in the current span.
- return TRUE;
+ if (findSource) {
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else {
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ if (i == spanStart || i < (spanStart + spanLength)) {
+ // The index is in the current span, or at an empty one.
+ return 0;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
- // srcIndex is the start of the current span, before the remaining ones.
- int32_t len = (remaining + 1) * oldLength_;
- if (i < (srcIndex + len)) {
- int32_t n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
- len = n * oldLength_;
+ // spanStart is the start of the current span, before the remaining ones.
+ int32_t len = (remaining + 1) * spanLength;
+ if (i < (spanStart + len)) {
+ int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining
+ len = n * spanLength;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
- return TRUE;
+ return 0;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
- return FALSE;
+ return 1;
+}
+
+int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) {
+ int32_t where = findIndex(i, TRUE, errorCode);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == srcIndex) {
+ // At or after string length, or at start of the found span.
+ return destIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return destIndex + newLength_;
+ } else {
+ // In an unchanged span, offset 1:1 within it.
+ return destIndex + (i - srcIndex);
+ }
+}
+
+int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ int32_t where = findIndex(i, FALSE, errorCode);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == destIndex) {
+ // At or after string length, or at start of the found span.
+ return srcIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return srcIndex + oldLength_;
+ } else {
+ // In an unchanged span, offset within it.
+ return srcIndex + (i - destIndex);
+ }
}
U_NAMESPACE_END
* @draft ICU 59
*/
Edits() :
- array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
+ array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
errorCode(U_ZERO_ERROR) {}
/**
* Destructor.
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while recording edits.
+ * Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
* @return TRUE if there are any change edits
* @draft ICU 59
*/
- UBool hasChanges() const;
+ UBool hasChanges() const { return numChanges != 0; }
+
+ /**
+ * @return the number of change edits
+ * @draft ICU 60
+ */
+ int32_t numberOfChanges() const { return numChanges; }
/**
* Access to the list of edits.
/**
* Advances to the next edit.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit
* @draft ICU 59
*/
* if the source index is out of bounds for the source string.
*
* @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
- UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
+ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, TRUE, errorCode) == 0;
+ }
+
+ /**
+ * Finds the edit that contains the destination index.
+ * The destination index may be found in a non-change
+ * even if normal iteration would skip non-changes.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return TRUE if the edit for the destination index was found
+ * @draft ICU 60
+ */
+ UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, FALSE, errorCode) == 0;
+ }
+
+ /**
+ * Returns the destination index corresponding to the given source index.
+ * If the source index is inside a change edit (not at its start),
+ * then the destination index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return destination index; undefined if i is not 0..string length
+ * @draft ICU 60
+ */
+ int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Returns the source index corresponding to the given destination index.
+ * If the destination index is inside a change edit (not at its start),
+ * then the source index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return source index; undefined if i is not 0..string length
+ * @draft ICU 60
+ */
+ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
void updateIndexes();
UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode);
+ /** @return -1: error or i<0; 0: found; 1: i>=string length */
+ int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
int32_t capacity;
int32_t length;
int32_t delta;
+ int32_t numChanges;
UErrorCode errorCode;
uint16_t stackArray[STACK_CAPACITY];
};
void StringCaseTest::TestEdits() {
IcuTestErrorCode errorCode(*this, "TestEdits");
Edits edits;
- assertFalse("new Edits", edits.hasChanges());
+ assertFalse("new Edits hasChanges", edits.hasChanges());
+ assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
- assertFalse("unchanged 10003", edits.hasChanges());
+ assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
+ assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
- assertTrue("some edits", edits.hasChanges());
+ assertTrue("some edits hasChanges", edits.hasChanges());
+ assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
UErrorCode outErrorCode = U_ZERO_ERROR;
assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
edits.reset();
- assertFalse("reset", edits.hasChanges());
+ assertFalse("reset hasChanges", edits.hasChanges());
+ assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits::Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next(errorCode));
Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators
const EditChange expected[], int32_t expLength, UBool withUnchanged,
UErrorCode &errorCode) {
- test.assertFalse(name, ei2.findSourceIndex(-1, errorCode));
+ test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
+ test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
int32_t expSrcIndex = 0;
int32_t expDestIndex = 0;
int32_t expReplIndex = 0;
+ int32_t expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
+ int32_t expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
const EditChange &expect = expected[expIndex];
UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
if (withUnchanged || expect.change) {
- test.assertTrue(msg, ei1.next(errorCode));
- test.assertEquals(msg, expect.change, ei1.hasChange());
- test.assertEquals(msg, expect.oldLength, ei1.oldLength());
- test.assertEquals(msg, expect.newLength, ei1.newLength());
- test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
- test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
- test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
+ test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
+ test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
+ test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
+ test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
+ test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
+ test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
}
- if (expect.oldLength > 0) {
- test.assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
- test.assertEquals(msg, expect.change, ei2.hasChange());
- test.assertEquals(msg, expect.oldLength, ei2.oldLength());
- test.assertEquals(msg, expect.newLength, ei2.newLength());
- test.assertEquals(msg, expSrcIndex, ei2.sourceIndex());
- test.assertEquals(msg, expDestIndex, ei2.destinationIndex());
- test.assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
+ test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
+ test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
+ test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
+ test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
+ test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
+ test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
+ test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
if (!withUnchanged) {
// For some iterators, move past the current range
// so that findSourceIndex() has to look before the current index.
}
}
- expSrcIndex += expect.oldLength;
- expDestIndex += expect.newLength;
+ if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+ test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
+ test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
+ test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
+ test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
+ test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
+ test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
+ test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next(errorCode);
+ ei2.next(errorCode);
+ }
+ }
+
+ // Span starts.
+ test.assertEquals(name + u":" + __LINE__, expDestIndexFromSrc,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
+ test.assertEquals(name + u":" + __LINE__, expSrcIndexFromDest,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
+
+ // Inside unchanged span map offsets 1:1.
+ if (!expect.change && expect.oldLength >= 2) {
+ test.assertEquals(name + u":" + __LINE__, expDestIndex + 1,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
+ test.assertEquals(name + u":" + __LINE__, expSrcIndex + 1,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
+ }
+
+ // Inside change span map to the span limit.
+ int32_t expSrcLimit = expSrcIndex + expect.oldLength;
+ int32_t expDestLimit = expDestIndex + expect.newLength;
+ if (expect.change) {
+ if (expect.oldLength >= 2) {
+ test.assertEquals(name + u":" + __LINE__, expDestLimit,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex + 1, errorCode));
+ }
+ if (expect.newLength >= 2) {
+ test.assertEquals(name + u":" + __LINE__, expSrcLimit,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex + 1, errorCode));
+ }
+ }
+
+ expSrcIndex = expSrcLimit;
+ expDestIndex = expDestLimit;
if (expect.change) {
expReplIndex += expect.newLength;
}
+ if (expect.newLength > 0) {
+ expSrcIndexFromDest = expSrcIndex;
+ }
+ if (expect.oldLength > 0) {
+ expDestIndexFromSrc = expDestIndex;
+ }
}
UnicodeString msg = UnicodeString(name).append(u" end");
- test.assertFalse(msg, ei1.next(errorCode));
- test.assertFalse(msg, ei1.hasChange());
- test.assertEquals(msg, 0, ei1.oldLength());
- test.assertEquals(msg, 0, ei1.newLength());
- test.assertEquals(msg, expSrcIndex, ei1.sourceIndex());
- test.assertEquals(msg, expDestIndex, ei1.destinationIndex());
- test.assertEquals(msg, expReplIndex, ei1.replacementIndex());
-
- test.assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
+ test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
+ test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
+ test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
+ test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
+ test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
+ test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
+ test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
+
+ test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
+ test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
+ test.assertEquals(name + u":" + __LINE__, expDestIndex,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex, errorCode));
+ test.assertEquals(name + u":" + __LINE__, expSrcIndex,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex, errorCode));
}
{ TRUE, 6, 3 }, // 가\u3133→ 갃
{ FALSE, 2, 2 } // 2 spaces
};
+ assertTrue("normalizeUTF8 with Edits hasChanges", edits.hasChanges());
+ assertEquals("normalizeUTF8 with Edits numberOfChanges", 9, edits.numberOfChanges());
TestUtility::checkEditsIter(*this, u"normalizeUTF8 with Edits",
edits.getFineIterator(), edits.getFineIterator(),
expectedChanges, UPRV_LENGTHOF(expectedChanges),
nfkc_cf->normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
assertSuccess("normalizeUTF8 omit unchanged", errorCode.get());
assertEquals("normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+ assertTrue("normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
+ assertEquals("normalizeUTF8 omit unchanged numberOfChanges", 9, edits.numberOfChanges());
TestUtility::checkEditsIter(*this, u"normalizeUTF8 omit unchanged",
edits.getFineIterator(), edits.getFineIterator(),
expectedChanges, UPRV_LENGTHOF(expectedChanges),
{ TRUE, 6, 3 }, // 가\u3133→ 갃
{ FALSE, 2, 2 } // 2 spaces
};
+ assertTrue("filtered normalizeUTF8 hasChanges", edits.hasChanges());
+ assertEquals("filtered normalizeUTF8 numberOfChanges", 7, edits.numberOfChanges());
TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8",
edits.getFineIterator(), edits.getFineIterator(),
filteredChanges, UPRV_LENGTHOF(filteredChanges),
fn2.normalizeUTF8(U_OMIT_UNCHANGED_TEXT, src, sink, &edits, errorCode);
assertSuccess("filtered normalizeUTF8 omit unchanged", errorCode.get());
assertEquals("filtered normalizeUTF8 omit unchanged", expected.c_str(), result.c_str());
+ assertTrue("filtered normalizeUTF8 omit unchanged hasChanges", edits.hasChanges());
+ assertEquals("filtered normalizeUTF8 omit unchanged numberOfChanges", 7, edits.numberOfChanges());
TestUtility::checkEditsIter(*this, u"filtered normalizeUTF8 omit unchanged",
edits.getFineIterator(), edits.getFineIterator(),
filteredChanges, UPRV_LENGTHOF(filteredChanges),
private char[] array;
private int length;
private int delta;
+ private int numChanges;
/**
* Constructs an empty object.
* @provisional This API might change or be removed in a future release.
*/
public void reset() {
- length = delta = 0;
+ length = delta = numChanges = 0;
}
private void setLastUnit(int last) {
if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
// Replacement of short oldLength text units by same-length new text.
// Merge into previous short-replacement record, if any.
+ ++numChanges;
int last = lastUnit();
if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
(last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
if (oldLength == 0 && newLength == 0) {
return;
}
+ ++numChanges;
int newDelta = newLength - oldLength;
if (newDelta != 0) {
if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
- public boolean hasChanges() {
- if (delta != 0) {
- return true;
- }
- for (int i = 0; i < length; ++i) {
- if (array[i] > MAX_UNCHANGED) {
- return true;
- }
- }
- return false;
- }
+ public boolean hasChanges() { return numChanges != 0; }
+
+ /**
+ * @return the number of change edits
+ * @draft ICU 60
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int numberOfChanges() { return numChanges; }
/**
* Access to the list of edits.
* @provisional This API might change or be removed in a future release.
*/
public boolean findSourceIndex(int i) {
- if (i < 0) { return false; }
- if (i < srcIndex) {
+ return findIndex(i, true) == 0;
+ }
+
+ /**
+ * Finds the edit that contains the destination index.
+ * The destination index may be found in a non-change
+ * even if normal iteration would skip non-changes.
+ * Normal iteration can continue from a found edit.
+ *
+ * <p>The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * <p>The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i destination index
+ * @return true if the edit for the destination index was found
+ * @draft ICU 60
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean findDestinationIndex(int i) {
+ return findIndex(i, false) == 0;
+ }
+
+ /** @return -1: error or i<0; 0: found; 1: i>=string length */
+ private int findIndex(int i, boolean findSource) {
+ if (i < 0) { return -1; }
+ int spanStart, spanLength;
+ if (findSource) { // find source index
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else { // find destination index
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ // If we are at the start or limit of an empty span, then we search from
+ // the start of the string so that we always return
+ // the first of several consecutive empty spans, for consistent results.
+ // We do not currently track the properties of the previous span,
+ // so for now we always reset if we are at the start of the current span.
+ if (i <= spanStart) {
// Reset the iterator to the start.
index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
- } else if (i < (srcIndex + oldLength_)) {
+ } else if (i < (spanStart + spanLength)) {
// The index is in the current span.
- return true;
+ return 0;
}
while (next(false)) {
- if (i < (srcIndex + oldLength_)) {
- // The index is in the current span.
- return true;
+ if (findSource) {
+ spanStart = srcIndex;
+ spanLength = oldLength_;
+ } else {
+ spanStart = destIndex;
+ spanLength = newLength_;
+ }
+ if (i == spanStart || i < (spanStart + spanLength)) {
+ // The index is in the current span, or at an empty one.
+ return 0;
}
if (remaining > 0) {
// Is the index in one of the remaining compressed edits?
- // srcIndex is the start of the current span, before the remaining ones.
- int len = (remaining + 1) * oldLength_;
- if (i < (srcIndex + len)) {
- int n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
- len = n * oldLength_;
+ // spanStart is the start of the current span, before the remaining ones.
+ int len = (remaining + 1) * spanLength;
+ if (i < (spanStart + len)) {
+ int n = (i - spanStart) / spanLength; // 1 <= n <= remaining
+ len = n * spanLength;
srcIndex += len;
replIndex += len;
destIndex += len;
remaining -= n;
- return true;
+ return 0;
}
// Make next() skip all of these edits at once.
oldLength_ = newLength_ = len;
remaining = 0;
}
}
- return false;
+ return 1;
+ }
+
+ /**
+ * Returns the destination index corresponding to the given source index.
+ * If the source index is inside a change edit (not at its start),
+ * then the destination index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * <p>(This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * <p>This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i source index
+ * @return destination index; undefined if i is not 0..string length
+ * @draft ICU 60
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int destinationIndexFromSourceIndex(int i) {
+ int where = findIndex(i, true);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == srcIndex) {
+ // At or after string length, or at start of the found span.
+ return destIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return destIndex + newLength_;
+ } else {
+ // In an unchanged span, offset 1:1 within it.
+ return destIndex + (i - srcIndex);
+ }
+ }
+
+ /**
+ * Returns the source index corresponding to the given destination index.
+ * If the destination index is inside a change edit (not at its start),
+ * then the source index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * <p>(This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * <p>This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i destination index
+ * @return source index; undefined if i is not 0..string length
+ * @draft ICU 60
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int sourceIndexFromDestinationIndex(int i) {
+ int where = findIndex(i, false);
+ if (where < 0) {
+ // Error or before the string.
+ return 0;
+ }
+ if (where > 0 || i == destIndex) {
+ // At or after string length, or at start of the found span.
+ return srcIndex;
+ }
+ if (changed) {
+ // In a change span, map to its end.
+ return srcIndex + oldLength_;
+ } else {
+ // In an unchanged span, offset within it.
+ return srcIndex + (i - destIndex);
+ }
}
/**
String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
EditChange[] expected, boolean withUnchanged) {
assertFalse(name, ei2.findSourceIndex(-1));
+ assertFalse(name, ei2.findDestinationIndex(-1));
int expSrcIndex = 0;
int expDestIndex = 0;
int expReplIndex = 0;
+ int expSrcIndexFromDest = 0; // for sourceIndexFromDestinationIndex()
+ int expDestIndexFromSrc = 0; // for destinationIndexFromSourceIndex()
for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
EditChange expect = expected[expIndex];
String msg = name + ' ' + expIndex;
assertEquals(msg, expReplIndex, ei1.replacementIndex());
}
- if (expect.oldLength > 0) {
+ if (expect.oldLength > 0 && expDestIndex == expDestIndexFromSrc) {
assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
assertEquals(msg, expect.change, ei2.hasChange());
assertEquals(msg, expect.oldLength, ei2.oldLength());
}
}
- expSrcIndex += expect.oldLength;
- expDestIndex += expect.newLength;
+ if (expect.newLength > 0 && expSrcIndex == expSrcIndexFromDest) {
+ assertTrue(msg, ei2.findDestinationIndex(expDestIndex));
+ assertEquals(msg, expect.change, ei2.hasChange());
+ assertEquals(msg, expect.oldLength, ei2.oldLength());
+ assertEquals(msg, expect.newLength, ei2.newLength());
+ assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next();
+ ei2.next();
+ }
+ }
+
+ // Span starts.
+ assertEquals(name, expDestIndexFromSrc,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex));
+ assertEquals(name, expSrcIndexFromDest,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex));
+
+ // Inside unchanged span map offsets 1:1.
+ if (!expect.change && expect.oldLength >= 2) {
+ assertEquals(name, expDestIndex + 1,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
+ assertEquals(name, expSrcIndex + 1,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
+ }
+
+ // Inside change span map to the span limit.
+ int expSrcLimit = expSrcIndex + expect.oldLength;
+ int expDestLimit = expDestIndex + expect.newLength;
+ if (expect.change) {
+ if (expect.oldLength >= 2) {
+ assertEquals(name, expDestLimit,
+ ei2.destinationIndexFromSourceIndex(expSrcIndex + 1));
+ }
+ if (expect.newLength >= 2) {
+ assertEquals(name, expSrcLimit,
+ ei2.sourceIndexFromDestinationIndex(expDestIndex + 1));
+ }
+ }
+
+ expSrcIndex = expSrcLimit;
+ expDestIndex = expDestLimit;
if (expect.change) {
expReplIndex += expect.newLength;
}
+ if (expect.newLength > 0) {
+ expSrcIndexFromDest = expSrcIndex;
+ }
+ if (expect.oldLength > 0) {
+ expDestIndexFromSrc = expDestIndex;
+ }
}
String msg = name + " end";
assertFalse(msg, ei1.next());
assertEquals(msg, expReplIndex, ei1.replacementIndex());
assertFalse(name, ei2.findSourceIndex(expSrcIndex));
+ assertFalse(name, ei2.findDestinationIndex(expDestIndex));
+ assertEquals(name, expDestIndex, ei2.destinationIndexFromSourceIndex(expSrcIndex));
+ assertEquals(name, expSrcIndex, ei2.sourceIndexFromDestinationIndex(expDestIndex));
}
@Test
public void TestEdits() {
Edits edits = new Edits();
- assertFalse("new Edits", edits.hasChanges());
+ assertFalse("new Edits hasChanges", edits.hasChanges());
+ assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
assertEquals("new Edits", 0, edits.lengthDelta());
edits.addUnchanged(1); // multiple unchanged ranges are combined
edits.addUnchanged(10000); // too long, and they are split
edits.addReplace(0, 0);
edits.addUnchanged(2);
- assertFalse("unchanged 10003", edits.hasChanges());
+ assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
+ assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
assertEquals("unchanged 10003", 0, edits.lengthDelta());
edits.addReplace(1, 1); // multiple short equal-length edits are compressed
edits.addUnchanged(0);
edits.addReplace(100, 0);
edits.addReplace(3000, 4000); // variable-length encoding
edits.addReplace(100000, 100000);
- assertTrue("some edits", edits.hasChanges());
+ assertTrue("some edits hasChanges", edits.hasChanges());
+ assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
EditChange[] coarseExpectedChanges = new EditChange[] {
fineExpectedChanges, false);
edits.reset();
- assertFalse("reset", edits.hasChanges());
+ assertFalse("reset hasChanges", edits.hasChanges());
+ assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
assertEquals("reset", 0, edits.lengthDelta());
Edits.Iterator ei = edits.getCoarseChangesIterator();
assertFalse("reset then iterator", ei.next());