// if sd->fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
- // the fLookAhead field or this state also.
+ // the fLookAhead field for this state also.
if (endMarker->fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
- U_ASSERT(baseCategory < duplCategory);
-
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
int32_t existingVal = sd->fDtran->elementAti(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
- existingVal = keepState;
+ newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->fDtran->setElementAt(newVal, col);
}
+ if (sd->fAccepting == duplState) {
+ sd->fAccepting = keepState;
+ } else if (sd->fAccepting > duplState) {
+ sd->fAccepting--;
+ }
+ if (sd->fLookAhead == duplState) {
+ sd->fLookAhead = keepState;
+ } else if (sd->fLookAhead > duplState) {
+ sd->fLookAhead--;
+ }
}
}
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
- int32_t firstState = 0;
+ int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
- printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+ // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
-
}
//-----------------------------------------------------------------------------
void RBBITest::TestTableRedundancies() {
UErrorCode status = U_ZERO_ERROR;
- UnicodeString rules {u"$s0=[;,*]; \n"
- "$s1=[a-z]; \n"
- "$s2=[i-n]; \n"
- "$s3=[x-z]; \n"
- "!!forward; \n"
- "($s0 | '?')*; \n"
- "($s1 | $s2 | $s3)*; \n" };
-
- RuleBasedBreakIterator *lbi =
- (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
- //lbi->dumpTables();
- UnicodeString lbRules = lbi->getRules();
- delete lbi;
-
- UParseError pe {};
- RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
+ LocalPointer<RuleBasedBreakIterator> bi (
+ (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
assertSuccess(WHERE, status);
if (U_FAILURE(status)) return;
- bi->dumpTables();
RBBIDataWrapper *dw = bi->fData;
const RBBIStateTable *fwtbl = dw->fForwardTable;
int32_t numCharClasses = dw->fHeader->fCatCount;
- printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
+ // printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
- // Check for duplicate columns
+ // Check for duplicate columns (character categories)
std::vector<UnicodeString> columns;
for (int32_t column = 0; column < numCharClasses; column++) {
}
columns.push_back(s);
}
- for (int c1=0; c1<numCharClasses; c1++) {
+ // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
+ for (int c1=1; c1<numCharClasses; c1++) {
for (int c2 = c1+1; c2 < numCharClasses; c2++) {
if (columns.at(c1) == columns.at(c2)) {
- printf("Duplicate columns (%d, %d)\n", c1, c2);
- break;
+ errln("%s:%d Duplicate columns (%d, %d)\n", __FILE__, __LINE__, c1, c2);
+ goto out;
}
}
}
+ out:
// Check for duplicate states
std::vector<UnicodeString> rows;
for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
UnicodeString s;
RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
- if (row->fAccepting < -1) {
- printf("row %d accepting = %d\n", r, row->fAccepting);
- }
+ assertTrue(WHERE, row->fAccepting >= -1);
s.append(row->fAccepting + 1); // values of -1 are expected.
s.append(row->fLookAhead);
s.append(row->fTagIdx);
}
rows.push_back(s);
}
- for (int r1=0; r1<(int32_t)fwtbl->fNumStates; r1++) {
+ for (int r1=0; r1 < (int32_t)fwtbl->fNumStates; r1++) {
for (int r2 = r1+1; r2 < (int32_t)fwtbl->fNumStates; r2++) {
if (rows.at(r1) == rows.at(r2)) {
- printf("Duplicate rows (%d, %d)\n", r1, r2);
- break;
+ errln("%s:%d Duplicate rows (%d, %d)\n", __FILE__, __LINE__, r1, r2);
+ return;
}
}
}
- delete bi;
}