package com.ibm.icu.text;
+import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
+import java.util.Arrays;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUBinary.Authenticate;
/**
* <p>Internal class used for Rule Based Break Iterators.</p>
* <p>This class provides access to the compiled break rule data, as
-* it is stored in a .brk file.
+* it is stored in a .brk file. Refer to the file common/rbbidata.h from
+* ICU4C for further details.
* Not intended for public use; declared public for testing purposes only.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public final class RBBIDataWrapper {
+
+ /**
+ * A RBBI State Transition table, the form of the data used at run time in Java.
+ * These can be created from stored ICU data, or built from rules.
+ * The structure corresponds closely to struct RBBIStateTable in ICU4C.
+ * Not intended for public use; declared public for testing purposes only.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ static public class RBBIStateTable {
+ /**
+ * Number of states (rows) in this table.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public int fNumStates;
+ /**
+ * Length of a table row in bytes. Note mismatch with table data, which is short[].
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public int fRowLen;
+ /**
+ * Option Flags for this state table.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public int fFlags;
+ /**
+ * Option Flags for this state table.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public int fReserved;
+ /**
+ * Linear array of next state values, accessed as short[state, char_class]
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public short[] fTable;
+
+ static RBBIStateTable get(ByteBuffer bytes, int length) throws IOException {
+ if (length == 0) {
+ return null;
+ }
+ if (length < 16) {
+ throw new IOException("Invalid RBBI state table length.");
+ }
+ RBBIStateTable This = new RBBIStateTable();
+ This.fNumStates = bytes.getInt();
+ This.fRowLen = bytes.getInt();
+ This.fFlags = bytes.getInt();
+ This.fReserved = bytes.getInt();
+ int lengthOfShorts = length - 16; // length in bytes.
+ This.fTable = ICUBinary.getShorts(bytes, lengthOfShorts / 2, lengthOfShorts & 1);
+ return This;
+ }
+
+ int put(DataOutputStream bytes) throws IOException {
+ bytes.writeInt(fNumStates);
+ bytes.writeInt(fRowLen);
+ bytes.writeInt(fFlags);
+ bytes.writeInt(fReserved);
+ int tableLen = fRowLen * fNumStates / 2; // fRowLen is bytes.
+ for (int i = 0; i < tableLen; i++) {
+ bytes.writeShort(fTable[i]);
+ }
+ int bytesWritten = 16 + fRowLen * fNumStates; // total bytes written,
+ // including 16 for the header.
+ while (bytesWritten % 8 != 0) {
+ bytes.writeByte(0);
+ ++bytesWritten;
+ }
+ return bytesWritten;
+ }
+
+ @Override
+ public boolean equals (Object other) {
+ if (other == this) {
+ return true;
+ }
+ if (!(other instanceof RBBIStateTable)) {
+ return false;
+ }
+ RBBIStateTable otherST = (RBBIStateTable)other;
+ if (fNumStates != otherST.fNumStates) return false;
+ if (fRowLen != otherST.fRowLen) return false;
+ if (fFlags != otherST.fFlags) return false;
+ if (fReserved != otherST.fReserved) return false;
+ return Arrays.equals(fTable, otherST.fTable);
+ }
+ }
+
+ /**
+ * Equals helper for state tables, including null handling.
+ * Not intended for public use; declared public for testing purposes only.
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ static public boolean equals(RBBIStateTable left, RBBIStateTable right) {
+ if (left == right) {
+ return true;
+ }
+ if (left == null || right == null) {
+ return false;
+ }
+ return left.equals(right);
+ }
+
+
//
// These fields are the ready-to-use compiled rule data, as
// read from the file.
* @deprecated This API is ICU internal only.
*/
@Deprecated
- public short fFTable[];
+ public RBBIStateTable fFTable;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
- public short fRTable[];
+ public RBBIStateTable fRTable;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
- public short fSFTable[];
+ public RBBIStateTable fSFTable;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
- public short fSRTable[];
+ public RBBIStateTable fSRTable;
+
Trie2 fTrie;
String fRuleSource;
int fStatusTable[];
- private boolean isBigEndian;
-
static final int DATA_FORMAT = 0x42726b20; // "Brk "
static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0
@Deprecated
public final static int NEXTSTATES = 4;
- // Index offsets to header fields of a state table
- // struct RBBIStateTable {... in the C version.
- //
- static final int NUMSTATES = 0;
- static final int ROWLEN = 2;
- static final int FLAGS = 4;
- //ivate static final int RESERVED_2 = 6;
- private static final int ROW_DATA = 8;
-
// Bit selectors for the "FLAGS" field of the state table header
// enum RBBIStateTableFlags in the C version.
//
*/
@Deprecated
public int getRowIndex(int state){
- return ROW_DATA + state * (fHeader.fCatCount + 4);
+ return state * (fHeader.fCatCount + 4);
}
RBBIDataWrapper() {
RBBIDataWrapper This = new RBBIDataWrapper();
ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
- This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos);
pos = This.fHeader.fFTable;
- This.fFTable = ICUBinary.getShorts(
- bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1);
+ This.fFTable = RBBIStateTable.get(bytes, This.fHeader.fFTableLen);
pos += This.fHeader.fFTableLen;
//
pos = This.fHeader.fRTable;
// Create & fill the table itself.
- This.fRTable = ICUBinary.getShorts(
- bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1);
+ This.fRTable = RBBIStateTable.get(bytes, This.fHeader.fRTableLen);
pos += This.fHeader.fRTableLen;
//
pos = This.fHeader.fSFTable;
// Create & fill the table itself.
- This.fSFTable = ICUBinary.getShorts(
- bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1);
+ This.fSFTable = RBBIStateTable.get(bytes, This.fHeader.fSFTableLen);
pos += This.fHeader.fSFTableLen;
}
pos = This.fHeader.fSRTable;
// Create & fill the table itself.
- This.fSRTable = ICUBinary.getShorts(
- bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1);
+ This.fSRTable = RBBIStateTable.get(bytes, This.fHeader.fSRTableLen);
pos += This.fHeader.fSRTableLen;
}
return This;
}
- /**
- * Getters for fields from the state table header
- * @internal
- * @deprecated This API is ICU internal only.
- */
- @Deprecated
- public int getStateTableNumStates(short table[]) {
- if (isBigEndian) {
- return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
- } else {
- return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
- }
- }
-
- int getStateTableFlags(short table[]) {
- // This works for up to 15 flags bits.
- return table[isBigEndian ? FLAGS + 1 : FLAGS];
- }
-
///CLOVER:OFF
/* Debug function to display the break iterator data. */
/**
*/
@Deprecated
public void dump(java.io.PrintStream out) {
- if (fFTable.length == 0) {
+ if (fFTable == null) {
// There is no table. Fail early for testing purposes.
throw new NullPointerException();
}
///CLOVER:OFF
/** Dump a state table. (A full set of RBBI rules has 4 state tables.) */
- private void dumpTable(java.io.PrintStream out, short table[]) {
- if (table == null || table.length == 0) {
+ private void dumpTable(java.io.PrintStream out, RBBIStateTable table) {
+ if (table == null || table.fTable.length == 0) {
out.println(" -- null -- ");
} else {
int n;
out.print("-");
}
out.println();
- for (state=0; state< getStateTableNumStates(table); state++) {
+ for (state=0; state < table.fNumStates; state++) {
dumpRow(out, table, state);
}
out.println();
* @param table
* @param state
*/
- private void dumpRow(java.io.PrintStream out, short table[], int state) {
+ private void dumpRow(java.io.PrintStream out, RBBIStateTable table, int state) {
StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20);
dest.append(intToString(state, 4));
int row = getRowIndex(state);
- if (table[row+ACCEPTING] != 0) {
- dest.append(intToString(table[row+ACCEPTING], 5));
+ if (table.fTable[row+ACCEPTING] != 0) {
+ dest.append(intToString(table.fTable[row+ACCEPTING], 5));
}else {
dest.append(" ");
}
- if (table[row+LOOKAHEAD] != 0) {
- dest.append(intToString(table[row+LOOKAHEAD], 5));
+ if (table.fTable[row+LOOKAHEAD] != 0) {
+ dest.append(intToString(table.fTable[row+LOOKAHEAD], 5));
}else {
dest.append(" ");
}
- dest.append(intToString(table[row+TAGIDX], 5));
+ dest.append(intToString(table.fTable[row+TAGIDX], 5));
for (int col=0; col<fHeader.fCatCount; col++) {
- dest.append(intToString(table[row+NEXTSTATES+col], 5));
+ dest.append(intToString(table.fTable[row+NEXTSTATES+col], 5));
}
out.println(dest);
}
// Write out the actual state tables.
- short[] tableData;
- tableData = fForwardTables.exportTable();
- Assert.assrt(outputPos == header[4]);
- for (i = 0; i < tableData.length; i++) {
- dos.writeShort(tableData[i]);
- outputPos += 2;
- }
+ RBBIDataWrapper.RBBIStateTable table = fForwardTables.exportTable();
+ assert(outputPos == header[4]);
+ outputPos += table.put(dos);
/* do not write the reverse table
tableData = fReverseTables.exportTable();
// Write the safe reverse table.
// If not present, write the plain reverse table (old style rule compatibility)
- Assert.assrt(outputPos == header[10]);
+ assert(outputPos == header[10]);
if (safeRevTableSize > 0) {
- tableData = fSafeRevTables.exportTable();
+ table = fSafeRevTables.exportTable();
} else {
- tableData = fReverseTables.exportTable();
- }
- for (i = 0; i < tableData.length; i++) {
- dos.writeShort(tableData[i]);
- outputPos += 2;
+ table = fReverseTables.exportTable();
}
+ outputPos += table.put(dos);
// write out the Trie table
Assert.assrt(outputPos == header[12]);
}
- //-----------------------------------------------------------------------------
- //
- // getTableSize() Calculate the size in bytes of the runtime form of this
- // state transition table.
- //
- // Note: Refer to common/rbbidata.h from ICU4C for the declarations
- // of the structures being matched by this calculation.
- //
- //-----------------------------------------------------------------------------
+ /**
+ * Calculate the size in bytes of the serialized form of this state transition table,
+ * which is identical to the ICU4C runtime form.
+ * Refer to common/rbbidata.h from ICU4C for the declarations of the structures
+ * being matched by this calculation.
+ */
int getTableSize() {
- int size = 0;
- int numRows;
- int numCols;
- int rowSize;
-
if (fRB.fTreeRoots[fRootIx] == null) {
return 0;
}
-
- size = /*sizeof(RBBIStateTable) - 4 */ 16; // The header, with no rows to the table.
-
- numRows = fDStates.size();
- numCols = fRB.fSetBuilder.getNumCharCategories();
-
- // Note The declaration of RBBIStateTableRow is for a table of two columns.
- // Therefore we subtract two from numCols when determining
- // how much storage to add to a row for the total columns.
- // rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
- rowSize = 8 + 2*numCols;
+ int size = 16; // The header of 4 ints, with no rows to the table.
+ int numRows = fDStates.size();
+ int numCols = fRB.fSetBuilder.getNumCharCategories();
+ int rowSize = 8 + 2*numCols;
size += numRows * rowSize;
- while (size % 8 > 0) { // Size must be multiple of 8 bytes in size.
- size++;
- }
-
+ size = (size + 7) & ~7; // round up to a multiple of 8 bytes
return size;
}
- //-----------------------------------------------------------------------------
- //
- // exportTable() export the state transition table in the ICU4C format.
- //
- // Most of the table is 16 bit shorts. This function exports
- // the whole thing as an array of shorts.
- //
- // The size of the array must be rounded up to a multiple of
- // 8 bytes.
- //
- // See struct RBBIStateTable in ICU4C, common/rbbidata.h
- //
- //-----------------------------------------------------------------------------
-
- short [] exportTable() {
+ /**
+ * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
+ * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
+ * in common/rbbidata.h
+ */
+ RBBIDataWrapper.RBBIStateTable exportTable() {
int state;
int col;
+ RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
if (fRB.fTreeRoots[fRootIx] == null) {
- return new short[0];
+ return table;
}
Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
fDStates.size() < 0x7fff);
-
- int numStates = fDStates.size();
+ table.fNumStates = fDStates.size();
// Size of table size in shorts.
// the "4" is the size of struct RBBIStateTableRow, the row header part only.
int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts.
- int tableSize = getTableSize() / 2;
-
-
- short [] table = new short[tableSize];
-
- //
- // Fill in the header fields.
- // Note that NUMSTATES, ROWLEN and FLAGS are ints, not shorts.
- // ICU data created from Java is always big endian format, so
- // order the halves of the 32 bit fields into the short[] data accordingly.
- // TODO: ticket 13598 restructure so that ints are represented as ints directly.
- //
- // RBBIStateTable.fNumStates
- table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16);
- table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff);
-
- // RBBIStateTable.fRowLen. In bytes.
- int rowLenInBytes = rowLen * 2;
- table[RBBIDataWrapper.ROWLEN] = (short)(rowLenInBytes >>> 16);
- table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLenInBytes & 0x0000ffff);
+ int tableSize = (getTableSize() - 16) / 2; // fTable length in shorts.
+ table.fTable = new short[tableSize];
+ table.fRowLen = rowLen * 2; // Row length in bytes.
- // RBBIStateTable.fFlags
- int flags = 0;
if (fRB.fLookAheadHardBreak) {
- flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
+ table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
}
if (fRB.fSetBuilder.sawBOF()) {
- flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
+ table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
}
- table[RBBIDataWrapper.FLAGS] = (short)(flags >>> 16);
- table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff);
int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
- for (state=0; state<numStates; state++) {
+ for (state=0; state<table.fNumStates; state++) {
RBBIStateDescriptor sd = fDStates.get(state);
- int row = 8 + state*rowLen;
+ int row = state*rowLen;
Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
- table[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
- table[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
- table[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx;
+ table.fTable[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
+ table.fTable[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
+ table.fTable[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx;
for (col=0; col<numCharCategories; col++) {
- table[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
+ table.fTable[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
}
}
return table;
CharacterIterator text = fText;
Trie2 trie = fRData.fTrie;
- short[] stateTable = fRData.fFTable;
+ short[] stateTable = fRData.fFTable.fTable;
int initialPosition = fPosition;
text.setIndex(initialPosition);
int result = initialPosition;
int state = START_STATE;
int row = fRData.getRowIndex(state);
short category = 3;
- int flagsState = fRData.getStateTableFlags(stateTable);
+ int flagsState = fRData.fFTable.fFlags;
int mode = RBBI_RUN;
if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
int result = 0;
int initialPosition = fromPosition;
fLookAheadMatches.reset();
- short[] stateTable = fRData.fSRTable;
+ short[] stateTable = fRData.fSRTable.fTable;
CISetIndex32(fText, fromPosition);
if (fromPosition == fText.getBeginIndex()) {
return BreakIterator.DONE;
row = fRData.getRowIndex(state);
category = 3; // TODO: obsolete? from the old start/run mode scheme?
mode = RBBI_RUN;
- if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
+ if ((fRData.fSRTable.fFlags & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
mode = RBBI_START;
}
// or simply retired if it is no longer interesting.
import java.text.CharacterIterator;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
import java.util.Locale;
// Build a break iterator from source rules.
// Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
RBBIDataWrapper dw = bi.fRData;
- short[] fwtbl = dw.fFTable;
+ RBBIDataWrapper.RBBIStateTable fwtbl = dw.fFTable;
int numCharClasses = dw.fHeader.fCatCount;
// Check for duplicate columns (character categories)
List<String> columns = new ArrayList<String>();
for (int column=0; column<numCharClasses; column++) {
StringBuilder s = new StringBuilder();
- for (int r = 1; r < dw.getStateTableNumStates(fwtbl); r++) {
+ for (int r = 1; r < fwtbl.fNumStates; r++) {
int row = dw.getRowIndex(r);
- short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+ short tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
s.append((char)tableVal);
}
columns.add(s.toString());
// Check for duplicate states.
List<String> rows = new ArrayList<String>();
- for (int r=0; r<dw.getStateTableNumStates(fwtbl); r++) {
+ for (int r=0; r<fwtbl.fNumStates; r++) {
StringBuilder s = new StringBuilder();
int row = dw.getRowIndex(r);
- assertTrue("Accepting < -1", fwtbl[row + RBBIDataWrapper.ACCEPTING] >= -1);
- s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]);
- s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]);
- s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]);
+ assertTrue("Accepting < -1", fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING] >= -1);
+ s.append(fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING]);
+ s.append(fwtbl.fTable[row + RBBIDataWrapper.LOOKAHEAD]);
+ s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGIDX]);
for (int column=0; column<numCharClasses; column++) {
- short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+ short tableVal = fwtbl.fTable[row + RBBIDataWrapper.NEXTSTATES + column];
s.append((char)tableVal);
}
rows.add(s.toString());
}
- for (int r1=0; r1 < dw.getStateTableNumStates(fwtbl); r1++) {
- for (int r2= r1+1; r2 < dw.getStateTableNumStates(fwtbl); r2++) {
+ for (int r1=0; r1 < fwtbl.fNumStates; r1++) {
+ for (int r2= r1+1; r2 < fwtbl.fNumStates; r2++) {
assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
// if (rows.get(r1).equals(rows.get(r2))) {
// System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
for (RuleBasedBreakIterator bi: breakIterators) {
String rules = bi.toString();
RuleBasedBreakIterator bi2 = new RuleBasedBreakIterator(rules);
-
- assertTrue("Forward Table", Arrays.equals(bi.fRData.fFTable, bi2.fRData.fFTable));
- assertTrue("Reverse Table", Arrays.equals(bi.fRData.fRTable, bi2.fRData.fRTable));
- assertTrue("Safe Forward Table", Arrays.equals(bi.fRData.fSFTable, bi2.fRData.fSFTable));
- assertTrue("SafeForward Table", Arrays.equals(bi.fRData.fSRTable, bi2.fRData.fSRTable));
+ assertTrue("Forward Table", RBBIDataWrapper.equals(bi.fRData.fFTable, bi2.fRData.fFTable));
+ assertTrue("Reverse Table", RBBIDataWrapper.equals(bi.fRData.fRTable, bi2.fRData.fRTable));
+ assertTrue("Safe Forward Table", RBBIDataWrapper.equals(bi.fRData.fSFTable, bi2.fRData.fSFTable));
+ assertTrue("SafeForward Table", RBBIDataWrapper.equals(bi.fRData.fSRTable, bi2.fRData.fSRTable));
}
}
}