From: Andy Heninger Date: Tue, 3 Apr 2018 23:41:28 +0000 (+0000) Subject: ICU-13194 RBBI safe tables, remove unnecessary Java/C++ differences. X-Git-Tag: release-62-rc~204^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=440e1e31c363c0eaa8daa395c1bd0ae884442934;p=icu ICU-13194 RBBI safe tables, remove unnecessary Java/C++ differences. X-SVN-Rev: 41192 --- diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index 90752ba1468..8d624f4a925 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -308,9 +308,12 @@ void RBBIRuleBuilder::optimizeTables() { leftClass = 3; rightClass = 0; - while (fForwardTable->findDuplCharClassFrom(leftClass, rightClass)) { - fSetBuilder->mergeCategories(leftClass, rightClass); - fForwardTable->removeColumn(rightClass); + + IntPair duplPair = {3, 0}; + + while (fForwardTable->findDuplCharClassFrom(&duplPair)) { + fSetBuilder->mergeCategories(duplPair.first, duplPair.second); + fForwardTable->removeColumn(duplPair.second); } fForwardTable->removeDuplicateStates(); } diff --git a/icu4c/source/common/rbbirb.h b/icu4c/source/common/rbbirb.h index 59ff66f9044..037c1dc2ce8 100644 --- a/icu4c/source/common/rbbirb.h +++ b/icu4c/source/common/rbbirb.h @@ -18,6 +18,8 @@ #if !UCONFIG_NO_BREAK_ITERATION +#include + #include "unicode/uobject.h" #include "unicode/rbbi.h" #include "unicode/uniset.h" @@ -25,8 +27,7 @@ #include "uhash.h" #include "uvector.h" #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that - // looks up references to $variables within a set. - + // looks up references to $variables within a set. U_NAMESPACE_BEGIN @@ -203,6 +204,11 @@ struct RBBISetTableEl { RBBINode *val; }; +/** + * A pair of ints, used to bundle pairs of states or pairs of character classes. + */ +typedef std::pair IntPair; + //---------------------------------------------------------------------------- // diff --git a/icu4c/source/common/rbbitblb.cpp b/icu4c/source/common/rbbitblb.cpp index bd039ea4a87..30e475c063d 100644 --- a/icu4c/source/common/rbbitblb.cpp +++ b/icu4c/source/common/rbbitblb.cpp @@ -1078,18 +1078,18 @@ void RBBITableBuilder::printPosSets(RBBINode *n) { // // findDuplCharClassFrom() // -bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) { +bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { int32_t numStates = fDStates->size(); int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); uint16_t table_base; uint16_t table_dupl; - for (; baseCategory < numCols-1; ++baseCategory) { - for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) { + for (; categories->first < numCols-1; categories->first++) { + for (categories->second=categories->first+1; categories->second < numCols; categories->second++) { for (int32_t state=0; stateelementAt(state); - table_base = (uint16_t)sd->fDtran->elementAti(baseCategory); - table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory); + table_base = (uint16_t)sd->fDtran->elementAti(categories->first); + table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second); if (table_base != table_dupl) { break; } @@ -1118,14 +1118,14 @@ void RBBITableBuilder::removeColumn(int32_t column) { /* * findDuplicateState */ -bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) { +bool RBBITableBuilder::findDuplicateState(IntPair *states) { int32_t numStates = fDStates->size(); int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); - for (; firstStateelementAt(firstState); - for (duplState=firstState+1; duplStateelementAt(duplState); + for (; states->firstfirst++) { + RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first); + for (states->second=states->first+1; states->secondsecond++) { + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second); if (firstSD->fAccepting != duplSD->fAccepting || firstSD->fLookAhead != duplSD->fLookAhead || firstSD->fTagsIdx != duplSD->fTagsIdx) { @@ -1136,8 +1136,8 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat int32_t firstVal = firstSD->fDtran->elementAti(col); int32_t duplVal = duplSD->fDtran->elementAti(col); if (!((firstVal == duplVal) || - ((firstVal == firstState || firstVal == duplState) && - (duplVal == firstState || duplVal == duplState)))) { + ((firstVal == states->first || firstVal == states->second) && + (duplVal == states->first || duplVal == states->second)))) { rowsMatch = false; break; } @@ -1151,21 +1151,21 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat } -bool RBBITableBuilder::findDuplicateSafeState(int32_t *firstState, int32_t *duplState) { +bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { int32_t numStates = fSafeTable->size(); - for (; *firstState(fSafeTable->elementAt(*firstState)); - for (*duplState=*firstState+1; *duplState(fSafeTable->elementAt(*duplState)); + for (; states->firstfirst++) { + UnicodeString *firstRow = static_cast(fSafeTable->elementAt(states->first)); + for (states->second=states->first+1; states->secondsecond++) { + UnicodeString *duplRow = static_cast(fSafeTable->elementAt(states->second)); bool rowsMatch = true; int32_t numCols = firstRow->length(); for (int32_t col=0; col < numCols; ++col) { int32_t firstVal = firstRow->charAt(col); int32_t duplVal = duplRow->charAt(col); if (!((firstVal == duplVal) || - ((firstVal == *firstState || firstVal == *duplState) && - (duplVal == *firstState || duplVal == *duplState)))) { + ((firstVal == states->first || firstVal == states->second) && + (duplVal == states->first || duplVal == states->second)))) { rowsMatch = false; break; } @@ -1242,11 +1242,10 @@ void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) { * RemoveDuplicateStates */ void RBBITableBuilder::removeDuplicateStates() { - int32_t firstState = 3; - int32_t duplicateState = 0; - while (findDuplicateState(firstState, duplicateState)) { - // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState); - removeState(firstState, duplicateState); + IntPair dupls = {3, 0}; + while (findDuplicateState(&dupls)) { + // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); + removeState(dupls.first, dupls.second); } } @@ -1428,11 +1427,10 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { } // Remove duplicate or redundant rows from the table. - int32_t firstState = 1; - int32_t duplicateState = 0; // initial value is not used; set by findDuplicateSafeState(). - while (findDuplicateSafeState(&firstState, &duplicateState)) { - // printf("Removing duplicate safe states (%d, %d)\n", firstState, duplicateState); - removeSafeState(firstState, duplicateState); + IntPair states = {1, 0}; + while (findDuplicateSafeState(&states)) { + // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); + removeSafeState(states.first, states.second); } } diff --git a/icu4c/source/common/rbbitblb.h b/icu4c/source/common/rbbitblb.h index 0a0326ae32f..238ae463e43 100644 --- a/icu4c/source/common/rbbitblb.h +++ b/icu4c/source/common/rbbitblb.h @@ -17,6 +17,7 @@ #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/rbbi.h" +#include "rbbirb.h" #include "rbbinode.h" @@ -49,11 +50,15 @@ public: */ void exportTable(void *where); - /** Find duplicate (redundant) character classes, beginning after the specifed + /** + * Find duplicate (redundant) character classes, beginning at the specified * pair, within this state table. This is an iterator-like function, used to - * identify char classes (state table columns) that can be eliminated. + * identify character classes (state table columns) that can be eliminated. + * @param categories in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate char classes were found, false otherwise. */ - bool findDuplCharClassFrom(int &baseClass, int &duplClass); + bool findDuplCharClassFrom(IntPair *statePair); /** Remove a column from the state table. Used when two character categories * have been found equivalent, and merged together, to eliminate the uneeded table column. @@ -95,13 +100,15 @@ private: void addRuleRootNodes(UVector *dest, RBBINode *node); - /** Find the next duplicate state. An iterator function. - * @param firstState (in/out) begin looking at this state, return the first of the - * pair of duplicates. - * @param duplicateState returns the duplicate state of fistState - * @return true if a duplicate pair of states was found. + /** + * Find duplicate (redundant) states, beginning at the specified pair, + * within this state table. This is an iterator-like function, used to + * identify states (state table rows) that can be eliminated. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate states were found, false otherwise. */ - bool findDuplicateState(int32_t &firstState, int32_t &duplicateState); + bool findDuplicateState(IntPair *states); /** Remove a duplicate state/ * @param keepState First of the duplicate pair. Keep it. @@ -111,12 +118,11 @@ private: void removeState(int32_t keepState, int32_t duplState); /** Find the next duplicate state in the safe reverse table. An iterator function. - * @param firstState ptr to state variable. Begin looking at this state, set to the first of the - * pair of duplicates on return. - * @param duplicateState ptr to where to return the duplicate state of fistState. Output only. - * @return true if a duplicate pair of states was found. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if a duplicate pair of states was found. */ - bool findDuplicateSafeState(int32_t *firstState, int32_t *duplicateState); + bool findDuplicateSafeState(IntPair *states); /** Remove a duplicate state from the safe table. * @param keepState First of the duplicate pair. Keep it.