]> granicus.if.org Git - icu/commitdiff
ICU-13569 rbbi table, remove duplicated states, working for C++.
authorAndy Heninger <andy.heninger@gmail.com>
Tue, 13 Feb 2018 01:08:29 +0000 (01:08 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Tue, 13 Feb 2018 01:08:29 +0000 (01:08 +0000)
X-SVN-Rev: 40902

icu4c/source/common/rbbirb.cpp
icu4c/source/common/rbbitblb.cpp
icu4c/source/test/intltest/rbbitst.cpp

index 99c8e5dd5af1afa7acbae486ce11553c93724161..61e596d6ed70fa987d35c3c84eee765ce9ca67af 100644 (file)
@@ -358,7 +358,7 @@ void RBBIRuleBuilder::optimizeTables() {
     int32_t rightClass;
 
     leftClass = 3;
-    rightClass = 4;
+    rightClass = 0;
     while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
         fSetBuilder->mergeCategories(leftClass, rightClass);
         fForwardTables->removeColumn(rightClass);
@@ -368,6 +368,9 @@ void RBBIRuleBuilder::optimizeTables() {
     }
 
     fForwardTables->removeDuplicateStates();
+    fReverseTables->removeDuplicateStates();
+    fSafeFwdTables->removeDuplicateStates();
+    fSafeRevTables->removeDuplicateStates();
 
 
 
index 68e9ffb666d7b7bbde2479e225e54efbfe4528b4..58168922d4bca22c98e714836b2034da1db5da9e 100644 (file)
@@ -762,7 +762,7 @@ void     RBBITableBuilder::flagAcceptingStates() {
                 // if sd->fAccepting already had a value other than 0 or -1, leave it be.
 
                 // If the end marker node is from a look-ahead rule, set
-                //   the fLookAhead field or this state also.
+                //   the fLookAhead field for this state also.
                 if (endMarker->fLookAheadEnd) {
                     // TODO:  don't change value if already set?
                     // TODO:  allow for more than one active look-ahead rule in engine.
@@ -1085,8 +1085,6 @@ bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &dup
     int32_t numStates = fDStates->size();
     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
 
-    U_ASSERT(baseCategory < duplCategory);
-
     uint16_t table_base;
     uint16_t table_dupl;
     for (; baseCategory < numCols-1; ++baseCategory) {
@@ -1171,12 +1169,22 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
             int32_t existingVal = sd->fDtran->elementAti(col);
             int32_t newVal = existingVal;
             if (existingVal == duplState) {
-                existingVal = keepState;
+                newVal = keepState;
             } else if (existingVal > duplState) {
                 newVal = existingVal - 1;
             }
             sd->fDtran->setElementAt(newVal, col);
         }
+        if (sd->fAccepting == duplState) {
+            sd->fAccepting = keepState;
+        } else if (sd->fAccepting > duplState) {
+            sd->fAccepting--;
+        }
+        if (sd->fLookAhead == duplState) {
+            sd->fLookAhead = keepState;
+        } else if (sd->fLookAhead > duplState) {
+            sd->fLookAhead--;
+        }
     }
 }
 
@@ -1185,13 +1193,12 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
  * RemoveDuplicateStates
  */
 void RBBITableBuilder::removeDuplicateStates() {
-    int32_t firstState = 0;
+    int32_t firstState = 3;
     int32_t duplicateState = 0;
     while (findDuplicateState(firstState, duplicateState)) {
-        printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+        // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
         removeState(firstState, duplicateState);
     }
-
 }
 
 //-----------------------------------------------------------------------------
index fd150617a7914b2efec5dae7281d65b6f261a9b0..1e0901c427b765c45adcd17a3b9334e9ffeb74db 100644 (file)
@@ -4462,32 +4462,17 @@ void RBBITest::TestBug12677() {
 void RBBITest::TestTableRedundancies() {
     UErrorCode status = U_ZERO_ERROR;
     
-    UnicodeString rules {u"$s0=[;,*]; \n"
-                "$s1=[a-z]; \n"
-                "$s2=[i-n]; \n"
-                "$s3=[x-z]; \n"
-                "!!forward; \n"
-                "($s0 | '?')*; \n"
-                "($s1 | $s2 | $s3)*; \n" };
-
-    RuleBasedBreakIterator *lbi =
-        (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
-    //lbi->dumpTables();
-    UnicodeString lbRules = lbi->getRules();
-    delete lbi;
-
-    UParseError pe {};
-    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
+    LocalPointer<RuleBasedBreakIterator> bi (
+        (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
     assertSuccess(WHERE, status);
     if (U_FAILURE(status)) return;
-    bi->dumpTables();
 
     RBBIDataWrapper *dw = bi->fData;
     const RBBIStateTable *fwtbl = dw->fForwardTable;
     int32_t numCharClasses = dw->fHeader->fCatCount;
-    printf("Char Classes: %d     states: %d\n", numCharClasses, fwtbl->fNumStates);
+    // printf("Char Classes: %d     states: %d\n", numCharClasses, fwtbl->fNumStates);
 
-    // Check for duplicate columns
+    // Check for duplicate columns (character categories)
 
     std::vector<UnicodeString> columns;
     for (int32_t column = 0; column < numCharClasses; column++) {
@@ -4498,23 +4483,23 @@ void RBBITest::TestTableRedundancies() {
         }
         columns.push_back(s);
     }
-    for (int c1=0; c1<numCharClasses; c1++) {
+    // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
+    for (int c1=1; c1<numCharClasses; c1++) {
         for (int c2 = c1+1; c2 < numCharClasses; c2++) {
             if (columns.at(c1) == columns.at(c2)) {
-                printf("Duplicate columns (%d, %d)\n", c1, c2);
-                break;
+                errln("%s:%d Duplicate columns (%d, %d)\n", __FILE__, __LINE__, c1, c2);
+                goto out;
             }
         }
     }
+  out:
 
     // Check for duplicate states
     std::vector<UnicodeString> rows;
     for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
         UnicodeString s;
         RBBIStateTableRow  *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
-        if (row->fAccepting < -1) {
-            printf("row %d accepting = %d\n", r, row->fAccepting);
-        }
+        assertTrue(WHERE, row->fAccepting >= -1);
         s.append(row->fAccepting + 1);   // values of -1 are expected.
         s.append(row->fLookAhead);
         s.append(row->fTagIdx);
@@ -4523,15 +4508,14 @@ void RBBITest::TestTableRedundancies() {
         }
         rows.push_back(s);
     }
-    for (int r1=0; r1<(int32_t)fwtbl->fNumStates; r1++) {
+    for (int r1=0; r1 < (int32_t)fwtbl->fNumStates; r1++) {
         for (int r2 = r1+1; r2 < (int32_t)fwtbl->fNumStates; r2++) {
             if (rows.at(r1) == rows.at(r2)) {
-                printf("Duplicate rows (%d, %d)\n", r1, r2);
-                break;
+                errln("%s:%d Duplicate rows (%d, %d)\n", __FILE__, __LINE__, r1, r2);
+                return;
             }
         }
     }
-    delete bi;
 }