import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
-//
-// class RBBITableBuilder is part of the RBBI rule compiler.
-// It builds the state transition table used by the RBBI runtime
-// from the expression syntax tree generated by the rule scanner.
-//
-// This class is part of the RBBI implementation only.
-// There is no user-visible public API here.
-//
+/**
+ * This class is part of the RBBI rule compiler.
+ * It builds the state transition table used by the RBBI runtime
+ * from the expression syntax tree generated by the rule scanner.
+ *
+ * This class is part of the RBBI implementation only.
+ * There is no user-visible public API here.
+ */
class RBBITableBuilder {
-
-
//
// RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
// one for each state.
private RBBIRuleBuilder fRB;
- private int fRootIx; // The array index into RBBIRuleBuilder.fTreeRoots
- // for the parse tree to operate on.
- // Too bad Java can't do indirection more easily!
- private List<RBBIStateDescriptor> fDStates; // D states (Aho's terminology)
- // Index is state number
- // Contents are RBBIStateDescriptor pointers.
+ /** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
+ private int fRootIx;
+
+ /** D states (Aho's terminology). Index is state number. */
+ private List<RBBIStateDescriptor> fDStates;
+
+ /** Synthesized safe table, a List of row arrays. */
+ private List<short[]> fSafeTable;
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
//
- // RBBITableBuilder::build - This is the main function for building the DFA state transtion
- // table from the RBBI rules parse tree.
+ // RBBITableBuilder::buildForwardTable - This is the main function for building
+ // the DFA state transition table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void buildForwardTable() {
// for all tables. Merge the ones from this table into the global set.
//
mergeRuleStatusVals();
-
- if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("states")>=0) {printStates();}
}
return false;
}
+ /**
+ * Find the next duplicate state in the safe reverse table. An iterator function.
+ * @param states in/out parameter, specifies where to start looking for duplicates,
+ * and returns the first pair of duplicates found, if any.
+ * @return true if duplicate states were found, false otherwise.
+ * @internal
+ */
+ boolean findDuplicateSafeState(RBBIRuleBuilder.IntPair states) {
+ int numStates = fSafeTable.size();
+
+ for (; states.first<numStates-1; ++states.first) {
+ short[] firstRow = fSafeTable.get(states.first);
+ for (states.second=states.first+1; states.second<numStates; ++states.second) {
+ short[] duplRow = fSafeTable.get(states.second);
+ boolean rowsMatch = true;
+ int numCols = firstRow.length;
+ for (int col=0; col < numCols; ++col) {
+ int firstVal = firstRow[col];
+ int duplVal = duplRow[col];
+ if (!((firstVal == duplVal) ||
+ ((firstVal == states.first || firstVal == states.second) &&
+ (duplVal == states.first || duplVal == states.second)))) {
+ rowsMatch = false;
+ break;
+ }
+ }
+ if (rowsMatch) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
/**
* Remove a duplicate state (row) from the state table. All references to the deleted state are
* redirected to "keepState", the first encountered of the duplicated pair of states.
}
}
+ /**
+ * Remove a duplicate state from the safe table.
+ * @param keepState The first of the duplicate pair of states, the one to be kept.
+ * @param duplState The second of the duplicate pair, the one to be removed.
+ * @internal
+ */
+ void removeSafeState(int keepState, int duplState) {
+ assert(keepState < duplState);
+ assert(duplState < fDStates.size());
+
+ fSafeTable.remove(duplState);
+ int numStates = fSafeTable.size();
+ for (int state=0; state<numStates; ++state) {
+ short[] row = fSafeTable.get(state);
+ for (int col=0; col<row.length; col++) {
+ int existingVal = row[col];
+ int newVal = existingVal;
+ if (existingVal == duplState) {
+ newVal = keepState;
+ } else if (existingVal > duplState) {
+ newVal = existingVal - 1;
+ }
+ row[col] = (short)newVal;
+ }
+ }
+ }
+
/**
* Check for, and remove duplicate states (table rows).
return table;
}
+ /**
+ * Synthesize a safe state table from the main state table.
+ */
+ void buildSafeReverseTable() {
+ // Find safe char class pairs.
+
+ // make a state table row for each trailing class, and map from class to row.
+
+ // For each pair
+ // startRow[p1] = p2
+ // p2row[p2] = stopRow
+ // For each unfilled in cell
+ // set to row corresponding to its column.
+
+ // Each safe pair is stored as two chars in the safePair stringBuilder.
+ StringBuilder safePairs = new StringBuilder();
+
+ int numCharClasses = fRB.fSetBuilder.getNumCharCategories();
+ int numStates = fDStates.size();
+
+ for (int c1=0; c1<numCharClasses; ++c1) {
+ for (int c2=0; c2 < numCharClasses; ++c2) {
+ int wantedEndState = -1;
+ int endState = 0;
+ for (int startState = 1; startState < numStates; ++startState) {
+ RBBIStateDescriptor startStateD = fDStates.get(startState);
+ int s2 = startStateD.fDtran[c1];
+ RBBIStateDescriptor s2StateD = fDStates.get(s2);
+ endState = s2StateD.fDtran[c2];
+ if (wantedEndState < 0) {
+ wantedEndState = endState;
+ } else {
+ if (wantedEndState != endState) {
+ break;
+ }
+ }
+ }
+ if (wantedEndState == endState) {
+ safePairs.append((char)c1);
+ safePairs.append((char)c2);
+ // System.out.printf("(%d, %d) ", c1, c2);
+ }
+ }
+ // System.out.printf("\n");
+ }
+
+ // Populate the initial safe table.
+ // The table as a whole is a List<short[]>
+ // Row 0 is the stop state.
+ // Row 1 is the start sate.
+ // Row 2 and beyond are other states, initially one per char class, but
+ // after initial construction, many of the states will be combined, compacting the table.)
+ // The String holds the nextState data only. The four leading fields of a row, fAccepting,
+ // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
+
+ assert(fSafeTable == null);
+ fSafeTable = new ArrayList<short[]>();
+ for (int row=0; row<numCharClasses + 2; ++row) {
+ fSafeTable.add(new short[numCharClasses]);
+ }
+
+ // From the start state, each input char class transitions to the state for that input.
+ short[] startState = fSafeTable.get(1);
+ for (int charClass=0; charClass < numCharClasses; ++charClass) {
+ // Note: +2 to skip the start & stop state rows.
+ startState[charClass] = (short)(charClass+2);
+ }
+
+ // Initially make every other state table row look like the start state row
+ // (except for the stop state, which remains all 0)
+ for (int row=2; row<numCharClasses+2; ++row) {
+ System.arraycopy(startState, 0, fSafeTable.get(row), 0, startState.length);
+ }
+
+ // Run through the safe pairs, set the next state to zero when pair has been seen.
+ // Zero being the stop state, meaning we found a safe point.
+ for (int pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
+ int c1 = safePairs.charAt(pairIdx);
+ int c2 = safePairs.charAt(pairIdx + 1);
+
+ short[] rowState = fSafeTable.get(c2 + 2);
+ rowState[c1] = 0;
+ }
+
+ // Remove duplicate or redundant rows from the table.
+ RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0);
+ while (findDuplicateSafeState(states)) {
+ // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
+ removeSafeState(states.first, states.second);
+ }
+ }
+
+
+ /**
+ * Calculate the size of the runtime form of this safe state table.
+ */
+ int getSafeTableSize() {
+ if (fSafeTable == null) {
+ return 0;
+ }
+ int size = 16; // The header of 4 ints, with no rows to the table.
+ int numRows = fSafeTable.size();
+ int numCols = fSafeTable.get(0).length;
+ int rowSize = 8 + 2*numCols;
+ size += numRows * rowSize;
+ // TODO: there are redundant round-up. Figure out best place, get rid of the rest.
+ size = (size + 7) & ~7; // round up to a multiple of 8 bytes
+ return size;
+ }
+
+
+ /**
+ * Create a RBBIDataWrapper.RBBIStateTable for the safe reverse table.
+ * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
+ * in common/rbbidata.h
+ */
+ RBBIDataWrapper.RBBIStateTable exportSafeTable() {
+ RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
+ table.fNumStates = fSafeTable.size();
+ int numCharCategories = fSafeTable.get(0).length;
+
+ // Size of table size in shorts.
+ // the "4" is the size of struct RBBIStateTableRow, the row header part only.
+ int rowLen = 4 + numCharCategories;
+ // TODO: tableSize is basically numStates * numCharCategories,
+ // except for alignment padding. Clean up here, and in main exportTable().
+ int tableSize = (getSafeTableSize() - 16) / 2; // fTable length in shorts.
+ table.fTable = new short[tableSize];
+ table.fRowLen = rowLen * 2; // Row length in bytes.
+
+ for (int state=0; state<table.fNumStates; state++) {
+ short[] rowArray = fSafeTable.get(state);
+ int row = state * rowLen;
+
+ for (int col=0; col<numCharCategories; col++) {
+ table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = rowArray[col];
+ }
+ }
+ return table;
+ }
//-----------------------------------------------------------------------------
}
+ /**
+ * Debug Function. Dump the fully constructed safe reverse table.
+ */
+ void printReverseTable() {
+ int c; // input "character"
+
+ System.out.printf(" Safe Reverse Table \n");
+ if (fSafeTable == null) {
+ System.out.printf(" --- nullptr ---\n");
+ return;
+ }
+ int numCharCategories = fSafeTable.get(0).length;
+ System.out.printf("state | i n p u t s y m b o l s \n");
+ System.out.printf(" | Acc LA Tag");
+ for (c=0; c< numCharCategories; c++) {
+ System.out.printf(" %2d", c);
+ }
+ System.out.printf("\n");
+ System.out.printf(" |---------------");
+ for (c=0; c<numCharCategories; c++) {
+ System.out.printf("---");
+ }
+ System.out.printf("\n");
+
+ for (int n=0; n<fSafeTable.size(); n++) {
+ short rowArray[] = fSafeTable.get(n);
+ System.out.printf(" %3d | " , n);
+ System.out.printf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags
+ for (c=0; c<numCharCategories; c++) {
+ System.out.printf(" %2d", rowArray[c]);
+ }
+ System.out.printf("\n");
+ }
+ System.out.printf("\n\n");
+ }
+
+
+
//-----------------------------------------------------------------------------