]> granicus.if.org Git - icu/commitdiff
ICU-13194 RBBI safe tables, remove unnecessary Java/C++ differences.
authorAndy Heninger <andy.heninger@gmail.com>
Tue, 3 Apr 2018 23:41:28 +0000 (23:41 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Tue, 3 Apr 2018 23:41:28 +0000 (23:41 +0000)
X-SVN-Rev: 41192

icu4c/source/common/rbbirb.cpp
icu4c/source/common/rbbirb.h
icu4c/source/common/rbbitblb.cpp
icu4c/source/common/rbbitblb.h

index 90752ba1468b2f19fb0d395d8dc9d34435990d94..8d624f4a9253dd4d125652d4339207d579b131a2 100644 (file)
@@ -308,9 +308,12 @@ void RBBIRuleBuilder::optimizeTables() {
 
     leftClass = 3;
     rightClass = 0;
-    while (fForwardTable->findDuplCharClassFrom(leftClass, rightClass)) {
-        fSetBuilder->mergeCategories(leftClass, rightClass);
-        fForwardTable->removeColumn(rightClass);
+
+    IntPair duplPair = {3, 0};
+
+    while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
+        fSetBuilder->mergeCategories(duplPair.first, duplPair.second);
+        fForwardTable->removeColumn(duplPair.second);
     }
     fForwardTable->removeDuplicateStates();
 }
index 59ff66f9044fd20bec16faf65adc641c783fd968..037c1dc2ce8ff798ead8a3e4fe44b958f7a8b69c 100644 (file)
@@ -18,6 +18,8 @@
 
 #if !UCONFIG_NO_BREAK_ITERATION
 
+#include <utility>
+
 #include "unicode/uobject.h"
 #include "unicode/rbbi.h"
 #include "unicode/uniset.h"
@@ -25,8 +27,7 @@
 #include "uhash.h"
 #include "uvector.h"
 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
-                          //    looks up references to $variables within a set.
-
+                             //    looks up references to $variables within a set.
 
 
 U_NAMESPACE_BEGIN
@@ -203,6 +204,11 @@ struct RBBISetTableEl {
     RBBINode      *val;
 };
 
+/**
+ *   A pair of ints, used to bundle pairs of states or pairs of character classes.
+ */
+typedef std::pair<int32_t, int32_t> IntPair;
+
 
 //----------------------------------------------------------------------------
 //
index bd039ea4a87747cefe48acab49bf6fc54e4600a3..30e475c063ddda77f935cdd4b8072f2ae2ddfb4b 100644 (file)
@@ -1078,18 +1078,18 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
 //
 //    findDuplCharClassFrom()
 //
-bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
+bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
     int32_t numStates = fDStates->size();
     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
 
     uint16_t table_base;
     uint16_t table_dupl;
-    for (; baseCategory < numCols-1; ++baseCategory) {
-        for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
+    for (; categories->first < numCols-1; categories->first++) {
+        for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
              for (int32_t state=0; state<numStates; state++) {
                  RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
-                 table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
-                 table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
+                 table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
+                 table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
                  if (table_base != table_dupl) {
                      break;
                  }
@@ -1118,14 +1118,14 @@ void RBBITableBuilder::removeColumn(int32_t column) {
 /*
  * findDuplicateState
  */
-bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
+bool RBBITableBuilder::findDuplicateState(IntPair *states) {
     int32_t numStates = fDStates->size();
     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
 
-    for (; firstState<numStates-1; ++firstState) {
-        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
-        for (duplState=firstState+1; duplState<numStates; ++duplState) {
-            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
+    for (; states->first<numStates-1; states->first++) {
+        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
+        for (states->second=states->first+1; states->second<numStates; states->second++) {
+            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
             if (firstSD->fAccepting != duplSD->fAccepting ||
                 firstSD->fLookAhead != duplSD->fLookAhead ||
                 firstSD->fTagsIdx   != duplSD->fTagsIdx) {
@@ -1136,8 +1136,8 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
                 int32_t firstVal = firstSD->fDtran->elementAti(col);
                 int32_t duplVal = duplSD->fDtran->elementAti(col);
                 if (!((firstVal == duplVal) ||
-                        ((firstVal == firstState || firstVal == duplState) &&
-                        (duplVal  == firstState || duplVal  == duplState)))) {
+                        ((firstVal == states->first || firstVal == states->second) &&
+                        (duplVal  == states->first || duplVal  == states->second)))) {
                     rowsMatch = false;
                     break;
                 }
@@ -1151,21 +1151,21 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
 }
 
 
-bool RBBITableBuilder::findDuplicateSafeState(int32_t *firstState, int32_t *duplState) {
+bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
     int32_t numStates = fSafeTable->size();
 
-    for (; *firstState<numStates-1; ++(*firstState)) {
-        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*firstState));
-        for (*duplState=*firstState+1; *duplState<numStates; ++(*duplState)) {
-            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*duplState));
+    for (; states->first<numStates-1; states->first++) {
+        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
+        for (states->second=states->first+1; states->second<numStates; states->second++) {
+            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
             bool rowsMatch = true;
             int32_t numCols = firstRow->length();
             for (int32_t col=0; col < numCols; ++col) {
                 int32_t firstVal = firstRow->charAt(col);
                 int32_t duplVal = duplRow->charAt(col);
                 if (!((firstVal == duplVal) ||
-                        ((firstVal == *firstState || firstVal == *duplState) &&
-                        (duplVal  == *firstState || duplVal  == *duplState)))) {
+                        ((firstVal == states->first || firstVal == states->second) &&
+                        (duplVal  == states->first || duplVal  == states->second)))) {
                     rowsMatch = false;
                     break;
                 }
@@ -1242,11 +1242,10 @@ void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) {
  * RemoveDuplicateStates
  */
 void RBBITableBuilder::removeDuplicateStates() {
-    int32_t firstState = 3;
-    int32_t duplicateState = 0;
-    while (findDuplicateState(firstState, duplicateState)) {
-        // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
-        removeState(firstState, duplicateState);
+    IntPair dupls = {3, 0};
+    while (findDuplicateState(&dupls)) {
+        // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
+        removeState(dupls.first, dupls.second);
     }
 }
 
@@ -1428,11 +1427,10 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
     }
 
     // Remove duplicate or redundant rows from the table.
-    int32_t firstState = 1;
-    int32_t duplicateState = 0;    // initial value is not used; set by findDuplicateSafeState().
-    while (findDuplicateSafeState(&firstState, &duplicateState)) {
-        // printf("Removing duplicate safe states (%d, %d)\n", firstState, duplicateState);
-        removeSafeState(firstState, duplicateState);
+    IntPair states = {1, 0};
+    while (findDuplicateSafeState(&states)) {
+        // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
+        removeSafeState(states.first, states.second);
     }
 }
 
index 0a0326ae32fd6a0e6468765d828225ba58db6c0b..238ae463e432d19fe826d79ecc0f567e53c2c23d 100644 (file)
@@ -17,6 +17,7 @@
 #include "unicode/utypes.h"
 #include "unicode/uobject.h"
 #include "unicode/rbbi.h"
+#include "rbbirb.h"
 #include "rbbinode.h"
 
 
@@ -49,11 +50,15 @@ public:
      */
     void     exportTable(void *where);
 
-    /** Find duplicate (redundant) character classes, beginning after the specifed
+    /**
+     *  Find duplicate (redundant) character classes, beginning at the specified
      *  pair, within this state table. This is an iterator-like function, used to
-     *  identify char classes (state table columns) that can be eliminated.
+     *  identify character classes (state table columns) that can be eliminated.
+     *  @param categories in/out parameter, specifies where to start looking for duplicates,
+     *                and returns the first pair of duplicates found, if any.
+     *  @return true if duplicate char classes were found, false otherwise.
      */
-    bool     findDuplCharClassFrom(int &baseClass, int &duplClass);
+    bool     findDuplCharClassFrom(IntPair *statePair);
 
     /** Remove a column from the state table. Used when two character categories
      *  have been found equivalent, and merged together, to eliminate the uneeded table column.
@@ -95,13 +100,15 @@ private:
 
     void     addRuleRootNodes(UVector *dest, RBBINode *node);
 
-    /** Find the next duplicate state. An iterator function.
-     * @param firstState (in/out) begin looking at this state, return the first of the
-     *                   pair of duplicates.
-     * @param duplicateState returns the duplicate state of fistState
-     * @return true if a duplicate pair of states was found.
+    /**
+     *  Find duplicate (redundant) states, beginning at the specified pair,
+     *  within this state table. This is an iterator-like function, used to
+     *  identify states (state table rows) that can be eliminated.
+     *  @param states in/out parameter, specifies where to start looking for duplicates,
+     *                and returns the first pair of duplicates found, if any.
+     *  @return true if duplicate states were found, false otherwise.
      */
-    bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
+    bool findDuplicateState(IntPair *states);
 
     /** Remove a duplicate state/
      * @param keepState First of the duplicate pair. Keep it.
@@ -111,12 +118,11 @@ private:
     void removeState(int32_t keepState, int32_t duplState);
 
     /** Find the next duplicate state in the safe reverse table. An iterator function.
-     * @param firstState ptr to state variable. Begin looking at this state, set to the first of the
-     *                   pair of duplicates on return.
-     * @param duplicateState ptr to where to return the duplicate state of fistState. Output only.
-     * @return true if a duplicate pair of states was found.
+     *  @param states in/out parameter, specifies where to start looking for duplicates,
+     *                and returns the first pair of duplicates found, if any.
+     *  @return true if a duplicate pair of states was found.
      */
-    bool findDuplicateSafeState(int32_t *firstState, int32_t *duplicateState);
+    bool findDuplicateSafeState(IntPair *states);
 
     /** Remove a duplicate state from the safe table.
      * @param keepState First of the duplicate pair. Keep it.