// Now that we're done, convert positions in t_boundary[] (indices in
// the normalized input string) back to indices in the original input UText
// while reversing t_boundary and pushing values to foundBreaks.
+ int32_t prevCPPos = -1;
+ int32_t prevUTextPos = -1;
for (int32_t i = numBreaks-1; i >= 0; i--) {
int32_t cpPos = t_boundary.elementAti(i);
+ U_ASSERT(cpPos > prevCPPos);
int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart;
- // Boundaries are added to foundBreaks output in ascending order.
- U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos);
- foundBreaks.push(utextPos, status);
+ U_ASSERT(utextPos >= prevUTextPos);
+ if (utextPos > prevUTextPos) {
+ // Boundaries are added to foundBreaks output in ascending order.
+ U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos);
+ foundBreaks.push(utextPos, status);
+ } else {
+ // Normalization expanded the input text, the dictionary found a boundary
+ // within the expansion, giving two boundaries with the same index in the
+ // original text. Ignore the second. See ticket #12918.
+ --numBreaks;
+ }
+ prevCPPos = cpPos;
+ prevUTextPos = utextPos;
}
// inString goes out of scope
TESTCASE_AUTO(TestBug5532);
TESTCASE_AUTO(TestBug7547);
TESTCASE_AUTO(TestBug12797);
+ TESTCASE_AUTO(TestBug12918);
TESTCASE_AUTO_END;
}
}
}
+void RBBITest::TestBug12918() {
+ // This test triggers an assertion failure in dictbe.cpp
+ const UChar *crasherString = u"\u3325\u4a16";
+ UErrorCode status = U_ZERO_ERROR;
+ UBreakIterator* iter = ubrk_open(UBRK_WORD, NULL, crasherString, -1, &status);
+ if (U_FAILURE(status)) {
+ errln("%s:%d status = %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ ubrk_first(iter);
+ int32_t pos = 0;
+ int32_t lastPos = -1;
+ while((pos = ubrk_next(iter)) != UBRK_DONE) {
+ if (pos <= lastPos) {
+ errln("%s:%d (pos, lastPos) = (%d, %d)", __FILE__, __LINE__, pos, lastPos);
+ break;
+ }
+ }
+ ubrk_close(iter);
+}
//
// TestDebug - A place-holder test for debugging purposes.