}
void RBBIRuleBuilder::optimizeTables() {
- int32_t leftClass;
- int32_t rightClass;
-
- leftClass = 3;
- rightClass = 0;
+ // Begin looking for duplicates with char class 3.
+ // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
+ // and should not have other categories merged into them.
IntPair duplPair = {3, 0};
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
- fSetBuilder->mergeCategories(duplPair.first, duplPair.second);
+ fSetBuilder->mergeCategories(duplPair);
fForwardTable->removeColumn(duplPair.second);
}
fForwardTable->removeDuplicateStates();
}
-void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
- U_ASSERT(left >= 1);
- U_ASSERT(right > left);
+void RBBISetBuilder::mergeCategories(IntPair categories) {
+ U_ASSERT(categories.first >= 1);
+ U_ASSERT(categories.second > categories.first);
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
int32_t rangeNum = rd->fNum & ~DICT_BIT;
int32_t rangeDict = rd->fNum & DICT_BIT;
- if (rangeNum == right) {
- rd->fNum = left | rangeDict;
- } else if (rangeNum > right) {
+ if (rangeNum == categories.second) {
+ rd->fNum = categories.first | rangeDict;
+ } else if (rangeNum > categories.second) {
rd->fNum--;
}
}
UChar32 getFirstChar(int32_t val) const;
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
// character were encountered.
- /** merge two character categories that have been identified as having equivalent behavior.
- * The ranges belonging to the right category (table column) will be added to the left.
+ /**
+ * Merge two character categories that have been identified as having equivalent behavior.
+ * The ranges belonging to the second category (table column) will be added to the first.
+ * @param categories the pair of categories to be merged.
*/
- void mergeCategories(int32_t left, int32_t right);
+ void mergeCategories(IntPair categories);
static constexpr int32_t DICT_BIT = 0x4000;
}
-void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
+void RBBITableBuilder::removeState(IntPair duplStates) {
+ const int32_t keepState = duplStates.first;
+ const int32_t duplState = duplStates.second;
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates->size());
}
}
-void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) {
+void RBBITableBuilder::removeSafeState(IntPair duplStates) {
+ const int32_t keepState = duplStates.first;
+ const int32_t duplState = duplStates.second;
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fSafeTable->size());
IntPair dupls = {3, 0};
while (findDuplicateState(&dupls)) {
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
- removeState(dupls.first, dupls.second);
+ removeState(dupls);
}
}
IntPair states = {1, 0};
while (findDuplicateSafeState(&states)) {
// printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
- removeSafeState(states.first, states.second);
+ removeSafeState(states);
}
}
void exportTable(void *where);
/**
- * Find duplicate (redundant) character classes, beginning at the specified
- * pair, within this state table. This is an iterator-like function, used to
- * identify character classes (state table columns) that can be eliminated.
+ * Find duplicate (redundant) character classes. Begin looking with categories.first.
+ * Duplicate, if found are returned in the categories parameter.
+ * This is an iterator-like function, used to identify character classes
+ * (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
*/
- bool findDuplCharClassFrom(IntPair *statePair);
+ bool findDuplCharClassFrom(IntPair *categories);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the uneeded table column.
*/
bool findDuplicateState(IntPair *states);
- /** Remove a duplicate state/
- * @param keepState First of the duplicate pair. Keep it.
- * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
- * to refer to keepState instead.
+ /** Remove a duplicate state.
+ * @param duplStates The duplicate states. The first is kept, the second is removed.
+ * All references to the second in the state table are retargeted
+ * to the first.
*/
- void removeState(int32_t keepState, int32_t duplState);
+ void removeState(IntPair duplStates);
/** Find the next duplicate state in the safe reverse table. An iterator function.
* @param states in/out parameter, specifies where to start looking for duplicates,
bool findDuplicateSafeState(IntPair *states);
/** Remove a duplicate state from the safe table.
- * @param keepState First of the duplicate pair. Keep it.
- * @param duplState Duplicate state. Remove it. Redirect all table references to the duplicate state
- * to refer to keepState instead.
+ * @param duplStates The duplicate states. The first is kept, the second is removed.
+ * All references to the second in the state table are retargeted
+ * to the first.
*/
- void removeSafeState(int32_t keepState, int32_t duplState);
+ void removeSafeState(IntPair duplStates);
// Set functions for UVector.
// TODO: make a USet subclass of UVector
RBBIDataWrapper *fData;
private:
- /**
- * The iteration state - current position, rule status for the current position,
- * and whether the iterator ran off the end, yielding UBRK_DONE.
- * Current position is pinned to be 0 < position <= text.length.
- * Current position is always set to a boundary.
- * @internal
- */
/**
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
* Never has the value UBRK_DONE (-1).