struct LookAheadResults {
int32_t fUsedSlotLimit;
int32_t fPositions[8];
- int16_t fKeys[8];
+ uint16_t fKeys[8];
LookAheadResults() : fUsedSlotLimit(0), fPositions(), fKeys() {}
- int32_t getPosition(int16_t key) {
+ int32_t getPosition(uint16_t key) {
for (int32_t i=0; i<fUsedSlotLimit; ++i) {
if (fKeys[i] == key) {
return fPositions[i];
UPRV_UNREACHABLE;
}
- void setPosition(int16_t key, int32_t position) {
+ void setPosition(uint16_t key, int32_t position) {
int32_t i;
for (i=0; i<fUsedSlotLimit; ++i) {
if (fKeys[i] == key) {
(tableData + tableRowLen * state);
- if (row->fAccepting == -1) {
+ uint16_t accepting = row->fAccepting;
+ if (accepting == ACCEPTING_UNCONDITIONAL) {
// Match found, common case.
if (mode != RBBI_START) {
result = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
}
- fRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
- }
-
- int16_t completedRule = row->fAccepting;
- if (completedRule > 0) {
+ fRuleStatusIndex = row->fTagsIdx; // Remember the break status (tag) values.
+ } else if (accepting > ACCEPTING_UNCONDITIONAL) {
// Lookahead match is completed.
- int32_t lookaheadResult = lookAheadMatches.getPosition(completedRule);
+ int32_t lookaheadResult = lookAheadMatches.getPosition(accepting);
if (lookaheadResult >= 0) {
- fRuleStatusIndex = row->fTagIdx;
+ fRuleStatusIndex = row->fTagsIdx;
fPosition = lookaheadResult;
return lookaheadResult;
}
// This would enable hard-break rules with no following context.
// But there are line break test failures when trying this. Investigate.
// Issue ICU-20837
- int16_t rule = row->fLookAhead;
+ uint16_t rule = row->fLookAhead;
if (rule != 0) {
int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText);
lookAheadMatches.setPosition(rule, pos);
RBBIStateTableRow *row = (RBBIStateTableRow *)
(table->fTableData + (table->fRowLen * s));
if (use8Bits) {
- RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r8.fAccepting, row->r8.fLookAhead, row->r8.fTagIdx);
+ RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r8.fAccepting, row->r8.fLookAhead, row->r8.fTagsIdx);
for (c=0; c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("%3d ", row->r8.fNextState[c]);
}
} else {
- RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r16.fAccepting, row->r16.fLookAhead, row->r16.fTagIdx);
+ RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r16.fAccepting, row->r16.fLookAhead, row->r16.fTagsIdx);
for (c=0; c<fHeader->fCatCount; c++) {
RBBIDebugPrintf("%3d ", row->r16.fNextState[c]);
}
-template <typename ST, typename UT>
+template <typename T>
struct RBBIStateTableRowT {
- ST fAccepting; /* Non-zero if this row is for an accepting state. */
- /* Value 0: not an accepting state. */
- /* -1: Unconditional Accepting state. */
- /* positive: Look-ahead match has completed. */
- /* Actual boundary position happened earlier */
- /* Value here == fLookAhead in earlier */
- /* state, at actual boundary pos. */
- ST fLookAhead; /* Non-zero if this row is for a state that */
- /* corresponds to a '/' in the rule source. */
- /* Value is the same as the fAccepting */
- /* value for the rule (which will appear */
- /* in a different state. */
- ST fTagIdx; /* Non-zero if this row covers a {tagged} position */
- /* from a rule. Value is the index in the */
- /* StatusTable of the set of matching */
- /* tags (rule status values) */
- UT fNextState[1]; /* Next State, indexed by char category. */
- /* Variable-length array declared with length 1 */
- /* to disable bounds checkers. */
- /* Array Size is actually fData->fHeader->fCatCount*/
- /* CAUTION: see RBBITableBuilder::getTableSize() */
- /* before changing anything here. */
+ T fAccepting; // Non-zero if this row is for an accepting state.
+ // Value 0: not an accepting state.
+ // 1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state.
+ // >1: Look-ahead match has completed.
+ // Actual boundary position happened earlier
+ // Value here == fLookAhead in earlier
+ // state, at actual boundary pos.
+ T fLookAhead; // Non-zero if this row is for a state that
+ // corresponds to a '/' in the rule source.
+ // Value is the same as the fAccepting
+ // value for the rule (which will appear
+ // in a different state.
+ T fTagsIdx; // Non-zero if this row covers a {tagged} position
+ // from a rule. Value is the index in the
+ // StatusTable of the set of matching
+ // tags (rule status values)
+ T fNextState[1]; // Next State, indexed by char category.
+ // Variable-length array declared with length 1
+ // to disable bounds checkers.
+ // Array Size is actually fData->fHeader->fCatCount
+ // CAUTION: see RBBITableBuilder::getTableSize()
+ // before changing anything here.
};
-typedef RBBIStateTableRowT<int8_t, uint8_t> RBBIStateTableRow8;
-typedef RBBIStateTableRowT<int16_t, uint16_t> RBBIStateTableRow16;
+typedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8;
+typedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16;
+
+constexpr uint16_t ACCEPTING_UNCONDITIONAL = 1; // Value constant for RBBIStateTableRow::fAccepting
union RBBIStateTableRow {
RBBIStateTableRow16 r16;
return;
}
fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
- int32_t laSlotsInUse = 0;
+
+ // Counter used when assigning lookahead rule numbers.
+ // Contains the last look-ahead number already in use.
+ // The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
+ // for non-lookahead accepting states. See the declarations of RBBIStateTableRowT.
+ int32_t laSlotsInUse = ACCEPTING_UNCONDITIONAL;
for (int32_t n=0; n<fDStates->size(); n++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
if (sd->fPositions->indexOf(endMarker) >= 0) {
// Any non-zero value for fAccepting means this is an accepting node.
// The value is what will be returned to the user as the break status.
- // If no other value was specified, force it to -1.
+ // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
if (sd->fAccepting==0) {
// State hasn't been marked as accepting yet. Do it now.
sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
if (sd->fAccepting == 0) {
- sd->fAccepting = -1;
+ sd->fAccepting = ACCEPTING_UNCONDITIONAL;
}
}
- if (sd->fAccepting==-1 && endMarker->fVal != 0) {
+ if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
// Both lookahead and non-lookahead accepting for this state.
// Favor the look-ahead, because a look-ahead match needs to
// immediately stop the run-time engine. First match, not longest.
sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
}
// implicit else:
- // if sd->fAccepting already had a value other than 0 or -1, leave it be.
+ // if sd->fAccepting already had a value other than 0 or 1, leave it be.
}
}
}
int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
if (positionsIdx >= 0) {
U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
- int32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
+ uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
// if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
// printf("%s:%d Bingo. sd->fLookAhead:%d lookaheadSlot:%d\n",
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
if (use8BitsForTable()) {
- U_ASSERT (-128 < sd->fAccepting && sd->fAccepting <= 127);
- U_ASSERT (-128 < sd->fLookAhead && sd->fLookAhead <= 127);
- U_ASSERT (-128 < sd->fTagsIdx && sd->fTagsIdx <= 127);
- row->r8.fAccepting = (int8_t)sd->fAccepting;
- row->r8.fLookAhead = (int8_t)sd->fLookAhead;
- row->r8.fTagIdx = (int8_t)sd->fTagsIdx;
+ U_ASSERT (sd->fAccepting <= 255);
+ U_ASSERT (sd->fLookAhead <= 255);
+ U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
+ row->r8.fAccepting = sd->fAccepting;
+ row->r8.fLookAhead = sd->fLookAhead;
+ row->r8.fTagsIdx = sd->fTagsIdx;
for (col=0; col<catCount; col++) {
U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
row->r8.fNextState[col] = sd->fDtran->elementAti(col);
}
} else {
- U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767);
- U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
- row->r16.fAccepting = (int16_t)sd->fAccepting;
- row->r16.fLookAhead = (int16_t)sd->fLookAhead;
- row->r16.fTagIdx = (int16_t)sd->fTagsIdx;
+ U_ASSERT (sd->fAccepting <= 0xffff);
+ U_ASSERT (sd->fLookAhead <= 0xffff);
+ U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
+ row->r16.fAccepting = sd->fAccepting;
+ row->r16.fLookAhead = sd->fLookAhead;
+ row->r16.fTagsIdx = sd->fTagsIdx;
for (col=0; col<catCount; col++) {
row->r16.fNextState[col] = sd->fDtran->elementAti(col);
}
if (use8BitsForSafeTable()) {
row->r8.fAccepting = 0;
row->r8.fLookAhead = 0;
- row->r8.fTagIdx = 0;
+ row->r8.fTagsIdx = 0;
for (col=0; col<catCount; col++) {
U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
row->r8.fNextState[col] = rowString->charAt(col);
} else {
row->r16.fAccepting = 0;
row->r16.fLookAhead = 0;
- row->r16.fTagIdx = 0;
+ row->r16.fTagsIdx = 0;
for (col=0; col<catCount; col++) {
row->r16.fNextState[col] = rowString->charAt(col);
}
class RBBIStateDescriptor : public UMemory {
public:
UBool fMarked;
- int32_t fAccepting;
- int32_t fLookAhead;
+ uint32_t fAccepting;
+ uint32_t fLookAhead;
UVector *fTagVals;
int32_t fTagsIdx;
UVector *fPositions; // Set of parse tree positions associated
UnicodeString s;
RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
if (in8Bits) {
- assertTrue(WHERE, row->r8.fAccepting >= -1);
- s.append(row->r8.fAccepting + 1); // values of -1 are expected.
+ s.append(row->r8.fAccepting);
s.append(row->r8.fLookAhead);
- s.append(row->r8.fTagIdx);
+ s.append(row->r8.fTagsIdx);
for (int32_t column = 0; column < numCharClasses; column++) {
s.append(row->r8.fNextState[column]);
}
} else {
- assertTrue(WHERE, row->r16.fAccepting >= -1);
- s.append(row->r16.fAccepting + 1); // values of -1 are expected.
+ s.append(row->r16.fAccepting);
s.append(row->r16.fLookAhead);
- s.append(row->r16.fTagIdx);
+ s.append(row->r16.fTagsIdx);
for (int32_t column = 0; column < numCharClasses; column++) {
s.append(row->r16.fNextState[column]);
}
This.fTable = new char[lengthOfTable];
for (int i = 0; i < lengthOfTable; i++) {
byte b = bytes.get();
- if (i % This.fRowLen < NEXTSTATES) {
- This.fTable[i] = (char) b; // Treat b as signed.
- } else {
- This.fTable[i] = (char)(0xff & b); // Treat b as unsigned.
- }
+ This.fTable[i] = (char)(0xff & b); // Treat b as unsigned.
}
ICUBinary.skipBytes(bytes, lengthOfTable & 1);
} else {
/**
* offset to the "tagIndex" field in a state table row.
*/
- public final static int TAGIDX = 2;
+ public final static int TAGSIDX = 2;
/**
* offset to the start of the next states array in a state table row.
*/
public final static int NEXTSTATES = 3;
+ /**
+ * value constant for the ACCEPTING field of a state table row.
+ */
+ public final static int ACCEPTING_UNCONDITIONAL = 1;
+
// Bit selectors for the "FLAGS" field of the state table header
// enum RBBIStateTableFlags in the C version.
//
} else {
dest.append(" ");
}
- dest.append(intToString(table.fTable[row+TAGIDX], 5));
+ dest.append(intToString(table.fTable[row+TAGSIDX], 5));
for (int col=0; col<fHeader.fCatCount; col++) {
dest.append(intToString(table.fTable[row+NEXTSTATES+col], 5));
*/
void mapLookAheadRules() {
fLookAheadRuleMap = new int[fRB.fScanner.numRules() + 1];
- int laSlotsInUse = 0;
+
+ // Counter used when assigning lookahead rule numbers.
+ // Contains the last look-ahead number already in use.
+ // The first look-ahead number is 2;
+ // Number 1 is reserved for non-lookahead accepting states.
+ int laSlotsInUse = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL;
for (RBBIStateDescriptor sd: fDStates) {
int laSlotForState = 0;
endMarker = endMarkerNodes.get(i);
for (n=0; n<fDStates.size(); n++) {
RBBIStateDescriptor sd = fDStates.get(n);
- //if (sd.fPositions.indexOf(endMarker) >= 0) {
if (sd.fPositions.contains(endMarker)) {
// Any non-zero value for fAccepting means this is an accepting node.
// The value is what will be returned to the user as the break status.
- // If no other value was specified, force it to -1.
+ // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
if (sd.fAccepting==0) {
// State hasn't been marked as accepting yet. Do it now.
sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
if (sd.fAccepting == 0) {
- sd.fAccepting = -1;
+ sd.fAccepting = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL;
}
}
- if (sd.fAccepting==-1 && endMarker.fVal != 0) {
+ if (sd.fAccepting==RBBIDataWrapper.ACCEPTING_UNCONDITIONAL && endMarker.fVal != 0) {
// Both lookahead and non-lookahead accepting for this state.
// Favor the look-ahead, because a look-ahead match needs to
// immediately stop the run-time engine. First match, not longest.
sd.fAccepting = fLookAheadRuleMap[endMarker.fVal];
}
// implicit else:
- // if sd.fAccepting already had a value other than 0 or -1, leave it be.
+ // if sd.fAccepting already had a value other than 0 or 1, leave it be.
}
}
}
RBBIStateDescriptor sd = fDStates.get(state);
int row = state*rowLen;
if (use8Bits) {
- Assert.assrt (-128 < sd.fAccepting && sd.fAccepting <= MAX_STATE_FOR_8BITS_TABLE);
- Assert.assrt (-128 < sd.fLookAhead && sd.fLookAhead <= MAX_STATE_FOR_8BITS_TABLE);
+ Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 255);
+ Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 255);
} else {
- Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
- Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
+ Assert.assrt (0 <= sd.fAccepting && sd.fAccepting <= 0xffff);
+ Assert.assrt (0 <= sd.fLookAhead && sd.fLookAhead <= 0xffff);
}
table.fTable[row + RBBIDataWrapper.ACCEPTING] = (char)sd.fAccepting;
table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (char)sd.fLookAhead;
- table.fTable[row + RBBIDataWrapper.TAGIDX] = (char)sd.fTagsIdx;
+ table.fTable[row + RBBIDataWrapper.TAGSIDX] = (char)sd.fTagsIdx;
for (col=0; col<numCharCategories; col++) {
if (use8Bits) {
- Assert.assrt (sd.fDtran[col] <= MAX_STATE_FOR_8BITS_TABLE);
+ Assert.assrt (0 <= sd.fDtran[col] && sd.fDtran[col] <= MAX_STATE_FOR_8BITS_TABLE);
}
table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (char)sd.fDtran[col];
}
// look up a state transition in the state table
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
row = fRData.getRowIndex(state);
- if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0xffff) {
+ int accepting = stateTable[row + RBBIDataWrapper.ACCEPTING];
+ if (accepting == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
// Match found, common case
result = text.getIndex();
if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
}
// Remember the break status (tag) values.
- fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
- }
-
- int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
- if (completedRule > 0 && completedRule != 0xffff) {
+ fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
+ } else if (accepting > RBBIDataWrapper.ACCEPTING_UNCONDITIONAL) {
// Lookahead match is completed
- int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
+ int lookaheadResult = fLookAheadMatches.getPosition(accepting);
if (lookaheadResult >= 0) {
- fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
+ fRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGSIDX];
fPosition = lookaheadResult;
return lookaheadResult;
}
version https://git-lfs.github.com/spec/v1
-oid sha256:8ed7db50765b06c8a35f48048543c5c9a2c2e19993f752bd71a15e6ac89aa3b3
-size 13141781
+oid sha256:bdf00a19b05bc52e17c2aea74e87cc1872a824d5a9cced226078c46a194a8799
+size 13141762
int row = dw.getRowIndex(r);
s.append(fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING]);
s.append(fwtbl.fTable[row + RBBIDataWrapper.LOOKAHEAD]);
- s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGIDX]);
+ s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGSIDX]);
for (int column=0; column<numCharClasses; column++) {
char tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
s.append(tableVal);