From: Andy Heninger Date: Mon, 5 Mar 2018 19:43:03 +0000 (+0000) Subject: ICU-13598 fix byte order bug in RBBI data wrapper. X-Git-Tag: release-61-rc~12 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7e1e4e0e348d8698884918193b1870541293f99b;p=icu ICU-13598 fix byte order bug in RBBI data wrapper. X-SVN-Rev: 41068 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java index 51d96e65870..6b3a9e9e38c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java @@ -9,9 +9,10 @@ package com.ibm.icu.text; +import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.ByteOrder; +import java.util.Arrays; import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUBinary.Authenticate; @@ -20,13 +21,131 @@ import com.ibm.icu.impl.Trie2; /** *

Internal class used for Rule Based Break Iterators.

*

This class provides access to the compiled break rule data, as -* it is stored in a .brk file. +* it is stored in a .brk file. Refer to the file common/rbbidata.h from +* ICU4C for further details. * Not intended for public use; declared public for testing purposes only. * @internal * @deprecated This API is ICU internal only. */ @Deprecated public final class RBBIDataWrapper { + + /** + * A RBBI State Transition table, the form of the data used at run time in Java. + * These can be created from stored ICU data, or built from rules. + * The structure corresponds closely to struct RBBIStateTable in ICU4C. + * Not intended for public use; declared public for testing purposes only. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + static public class RBBIStateTable { + /** + * Number of states (rows) in this table. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public int fNumStates; + /** + * Length of a table row in bytes. Note mismatch with table data, which is short[]. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public int fRowLen; + /** + * Option Flags for this state table. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public int fFlags; + /** + * Option Flags for this state table. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public int fReserved; + /** + * Linear array of next state values, accessed as short[state, char_class] + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public short[] fTable; + + static RBBIStateTable get(ByteBuffer bytes, int length) throws IOException { + if (length == 0) { + return null; + } + if (length < 16) { + throw new IOException("Invalid RBBI state table length."); + } + RBBIStateTable This = new RBBIStateTable(); + This.fNumStates = bytes.getInt(); + This.fRowLen = bytes.getInt(); + This.fFlags = bytes.getInt(); + This.fReserved = bytes.getInt(); + int lengthOfShorts = length - 16; // length in bytes. + This.fTable = ICUBinary.getShorts(bytes, lengthOfShorts / 2, lengthOfShorts & 1); + return This; + } + + int put(DataOutputStream bytes) throws IOException { + bytes.writeInt(fNumStates); + bytes.writeInt(fRowLen); + bytes.writeInt(fFlags); + bytes.writeInt(fReserved); + int tableLen = fRowLen * fNumStates / 2; // fRowLen is bytes. + for (int i = 0; i < tableLen; i++) { + bytes.writeShort(fTable[i]); + } + int bytesWritten = 16 + fRowLen * fNumStates; // total bytes written, + // including 16 for the header. + while (bytesWritten % 8 != 0) { + bytes.writeByte(0); + ++bytesWritten; + } + return bytesWritten; + } + + @Override + public boolean equals (Object other) { + if (other == this) { + return true; + } + if (!(other instanceof RBBIStateTable)) { + return false; + } + RBBIStateTable otherST = (RBBIStateTable)other; + if (fNumStates != otherST.fNumStates) return false; + if (fRowLen != otherST.fRowLen) return false; + if (fFlags != otherST.fFlags) return false; + if (fReserved != otherST.fReserved) return false; + return Arrays.equals(fTable, otherST.fTable); + } + } + + /** + * Equals helper for state tables, including null handling. + * Not intended for public use; declared public for testing purposes only. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + static public boolean equals(RBBIStateTable left, RBBIStateTable right) { + if (left == right) { + return true; + } + if (left == null || right == null) { + return false; + } + return left.equals(right); + } + + // // These fields are the ready-to-use compiled rule data, as // read from the file. @@ -42,31 +161,30 @@ public final class RBBIDataWrapper { * @deprecated This API is ICU internal only. */ @Deprecated - public short fFTable[]; + public RBBIStateTable fFTable; /** * @internal * @deprecated This API is ICU internal only. */ @Deprecated - public short fRTable[]; + public RBBIStateTable fRTable; /** * @internal * @deprecated This API is ICU internal only. */ @Deprecated - public short fSFTable[]; + public RBBIStateTable fSFTable; /** * @internal * @deprecated This API is ICU internal only. */ @Deprecated - public short fSRTable[]; + public RBBIStateTable fSRTable; + Trie2 fTrie; String fRuleSource; int fStatusTable[]; - private boolean isBigEndian; - static final int DATA_FORMAT = 0x42726b20; // "Brk " static final int FORMAT_VERSION = 0x04000000; // 4.0.0.0 @@ -138,15 +256,6 @@ public final class RBBIDataWrapper { @Deprecated public final static int NEXTSTATES = 4; - // Index offsets to header fields of a state table - // struct RBBIStateTable {... in the C version. - // - static final int NUMSTATES = 0; - static final int ROWLEN = 2; - static final int FLAGS = 4; - //ivate static final int RESERVED_2 = 6; - private static final int ROW_DATA = 8; - // Bit selectors for the "FLAGS" field of the state table header // enum RBBIStateTableFlags in the C version. // @@ -212,7 +321,7 @@ public final class RBBIDataWrapper { */ @Deprecated public int getRowIndex(int state){ - return ROW_DATA + state * (fHeader.fCatCount + 4); + return state * (fHeader.fCatCount + 4); } RBBIDataWrapper() { @@ -226,7 +335,6 @@ public final class RBBIDataWrapper { RBBIDataWrapper This = new RBBIDataWrapper(); ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); - This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN; // Read in the RBBI data header... This.fHeader = new RBBIDataHeader(); @@ -274,8 +382,7 @@ public final class RBBIDataWrapper { ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos); pos = This.fHeader.fFTable; - This.fFTable = ICUBinary.getShorts( - bytes, This.fHeader.fFTableLen / 2, This.fHeader.fFTableLen & 1); + This.fFTable = RBBIStateTable.get(bytes, This.fHeader.fFTableLen); pos += This.fHeader.fFTableLen; // @@ -287,8 +394,7 @@ public final class RBBIDataWrapper { pos = This.fHeader.fRTable; // Create & fill the table itself. - This.fRTable = ICUBinary.getShorts( - bytes, This.fHeader.fRTableLen / 2, This.fHeader.fRTableLen & 1); + This.fRTable = RBBIStateTable.get(bytes, This.fHeader.fRTableLen); pos += This.fHeader.fRTableLen; // @@ -300,8 +406,7 @@ public final class RBBIDataWrapper { pos = This.fHeader.fSFTable; // Create & fill the table itself. - This.fSFTable = ICUBinary.getShorts( - bytes, This.fHeader.fSFTableLen / 2, This.fHeader.fSFTableLen & 1); + This.fSFTable = RBBIStateTable.get(bytes, This.fHeader.fSFTableLen); pos += This.fHeader.fSFTableLen; } @@ -314,8 +419,7 @@ public final class RBBIDataWrapper { pos = This.fHeader.fSRTable; // Create & fill the table itself. - This.fSRTable = ICUBinary.getShorts( - bytes, This.fHeader.fSRTableLen / 2, This.fHeader.fSRTableLen & 1); + This.fSRTable = RBBIStateTable.get(bytes, This.fHeader.fSRTableLen); pos += This.fHeader.fSRTableLen; } @@ -381,25 +485,6 @@ public final class RBBIDataWrapper { return This; } - /** - * Getters for fields from the state table header - * @internal - * @deprecated This API is ICU internal only. - */ - @Deprecated - public int getStateTableNumStates(short table[]) { - if (isBigEndian) { - return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff); - } else { - return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff); - } - } - - int getStateTableFlags(short table[]) { - // This works for up to 15 flags bits. - return table[isBigEndian ? FLAGS + 1 : FLAGS]; - } - ///CLOVER:OFF /* Debug function to display the break iterator data. */ /** @@ -408,7 +493,7 @@ public final class RBBIDataWrapper { */ @Deprecated public void dump(java.io.PrintStream out) { - if (fFTable.length == 0) { + if (fFTable == null) { // There is no table. Fail early for testing purposes. throw new NullPointerException(); } @@ -465,8 +550,8 @@ public final class RBBIDataWrapper { ///CLOVER:OFF /** Dump a state table. (A full set of RBBI rules has 4 state tables.) */ - private void dumpTable(java.io.PrintStream out, short table[]) { - if (table == null || table.length == 0) { + private void dumpTable(java.io.PrintStream out, RBBIStateTable table) { + if (table == null || table.fTable.length == 0) { out.println(" -- null -- "); } else { int n; @@ -480,7 +565,7 @@ public final class RBBIDataWrapper { out.print("-"); } out.println(); - for (state=0; state< getStateTableNumStates(table); state++) { + for (state=0; state < table.fNumStates; state++) { dumpRow(out, table, state); } out.println(); @@ -494,24 +579,24 @@ public final class RBBIDataWrapper { * @param table * @param state */ - private void dumpRow(java.io.PrintStream out, short table[], int state) { + private void dumpRow(java.io.PrintStream out, RBBIStateTable table, int state) { StringBuilder dest = new StringBuilder(fHeader.fCatCount*5 + 20); dest.append(intToString(state, 4)); int row = getRowIndex(state); - if (table[row+ACCEPTING] != 0) { - dest.append(intToString(table[row+ACCEPTING], 5)); + if (table.fTable[row+ACCEPTING] != 0) { + dest.append(intToString(table.fTable[row+ACCEPTING], 5)); }else { dest.append(" "); } - if (table[row+LOOKAHEAD] != 0) { - dest.append(intToString(table[row+LOOKAHEAD], 5)); + if (table.fTable[row+LOOKAHEAD] != 0) { + dest.append(intToString(table.fTable[row+LOOKAHEAD], 5)); }else { dest.append(" "); } - dest.append(intToString(table[row+TAGIDX], 5)); + dest.append(intToString(table.fTable[row+TAGIDX], 5)); for (int col=0; col 0) { - tableData = fSafeRevTables.exportTable(); + table = fSafeRevTables.exportTable(); } else { - tableData = fReverseTables.exportTable(); - } - for (i = 0; i < tableData.length; i++) { - dos.writeShort(tableData[i]); - outputPos += 2; + table = fReverseTables.exportTable(); } + outputPos += table.put(dos); // write out the Trie table Assert.assrt(outputPos == header[12]); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java index 8954b672bbb..2a4d0582541 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java @@ -978,118 +978,70 @@ class RBBITableBuilder { } - //----------------------------------------------------------------------------- - // - // getTableSize() Calculate the size in bytes of the runtime form of this - // state transition table. - // - // Note: Refer to common/rbbidata.h from ICU4C for the declarations - // of the structures being matched by this calculation. - // - //----------------------------------------------------------------------------- + /** + * Calculate the size in bytes of the serialized form of this state transition table, + * which is identical to the ICU4C runtime form. + * Refer to common/rbbidata.h from ICU4C for the declarations of the structures + * being matched by this calculation. + */ int getTableSize() { - int size = 0; - int numRows; - int numCols; - int rowSize; - if (fRB.fTreeRoots[fRootIx] == null) { return 0; } - - size = /*sizeof(RBBIStateTable) - 4 */ 16; // The header, with no rows to the table. - - numRows = fDStates.size(); - numCols = fRB.fSetBuilder.getNumCharCategories(); - - // Note The declaration of RBBIStateTableRow is for a table of two columns. - // Therefore we subtract two from numCols when determining - // how much storage to add to a row for the total columns. - // rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); - rowSize = 8 + 2*numCols; + int size = 16; // The header of 4 ints, with no rows to the table. + int numRows = fDStates.size(); + int numCols = fRB.fSetBuilder.getNumCharCategories(); + int rowSize = 8 + 2*numCols; size += numRows * rowSize; - while (size % 8 > 0) { // Size must be multiple of 8 bytes in size. - size++; - } - + size = (size + 7) & ~7; // round up to a multiple of 8 bytes return size; } - //----------------------------------------------------------------------------- - // - // exportTable() export the state transition table in the ICU4C format. - // - // Most of the table is 16 bit shorts. This function exports - // the whole thing as an array of shorts. - // - // The size of the array must be rounded up to a multiple of - // 8 bytes. - // - // See struct RBBIStateTable in ICU4C, common/rbbidata.h - // - //----------------------------------------------------------------------------- - - short [] exportTable() { + /** + * Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table. + * RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C, + * in common/rbbidata.h + */ + RBBIDataWrapper.RBBIStateTable exportTable() { int state; int col; + RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable(); if (fRB.fTreeRoots[fRootIx] == null) { - return new short[0]; + return table; } Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff && fDStates.size() < 0x7fff); - - int numStates = fDStates.size(); + table.fNumStates = fDStates.size(); // Size of table size in shorts. // the "4" is the size of struct RBBIStateTableRow, the row header part only. int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. - int tableSize = getTableSize() / 2; - - - short [] table = new short[tableSize]; - - // - // Fill in the header fields. - // Note that NUMSTATES, ROWLEN and FLAGS are ints, not shorts. - // ICU data created from Java is always big endian format, so - // order the halves of the 32 bit fields into the short[] data accordingly. - // TODO: ticket 13598 restructure so that ints are represented as ints directly. - // - // RBBIStateTable.fNumStates - table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16); - table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff); - - // RBBIStateTable.fRowLen. In bytes. - int rowLenInBytes = rowLen * 2; - table[RBBIDataWrapper.ROWLEN] = (short)(rowLenInBytes >>> 16); - table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLenInBytes & 0x0000ffff); + int tableSize = (getTableSize() - 16) / 2; // fTable length in shorts. + table.fTable = new short[tableSize]; + table.fRowLen = rowLen * 2; // Row length in bytes. - // RBBIStateTable.fFlags - int flags = 0; if (fRB.fLookAheadHardBreak) { - flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; + table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK; } if (fRB.fSetBuilder.sawBOF()) { - flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; + table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED; } - table[RBBIDataWrapper.FLAGS] = (short)(flags >>> 16); - table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff); int numCharCategories = fRB.fSetBuilder.getNumCharCategories(); - for (state=0; state columns = new ArrayList(); for (int column=0; column rows = new ArrayList(); - for (int r=0; r= -1); - s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]); - s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]); - s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]); + assertTrue("Accepting < -1", fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING] >= -1); + s.append(fwtbl.fTable[row + RBBIDataWrapper.ACCEPTING]); + s.append(fwtbl.fTable[row + RBBIDataWrapper.LOOKAHEAD]); + s.append(fwtbl.fTable[row + RBBIDataWrapper.TAGIDX]); for (int column=0; column