From: Fredrik Roubert Date: Tue, 15 Jul 2014 20:40:48 +0000 (+0000) Subject: ICU-10944 Add ByteBuffer support for BreakIterator. X-Git-Tag: milestone-59-0-1~1779 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ffc1584fe738c4799b17b04018a1446ae81f339c;p=icu ICU-10944 Add ByteBuffer support for BreakIterator. R=markus.icu@gmail.com Review URL: https://codereview.appspot.com/108450046 X-SVN-Rev: 36036 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakCTDictionary.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakCTDictionary.java index e033881f949..61bb831fb21 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakCTDictionary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakCTDictionary.java @@ -1,14 +1,14 @@ /* ******************************************************************************* - * Copyright (C) 1996-2011, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.text; -import java.io.DataInputStream; import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import java.text.CharacterIterator; import com.ibm.icu.impl.ICUBinary; @@ -103,25 +103,24 @@ class BreakCTDictionary { private CompactTrieNodes[] nodes; // Constructor - public BreakCTDictionary(InputStream is) throws IOException { - ICUBinary.readHeader(is, DATA_FORMAT_ID, null); + public BreakCTDictionary(ByteBuffer bytes) throws IOException { + ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null); - DataInputStream in = new DataInputStream(is); // Get header information fData = new CompactTrieHeader(); - fData.size = in.readInt(); - fData.magic = in.readInt(); - fData.nodeCount = in.readShort(); - fData.root = in.readShort(); + fData.size = bytes.getInt(); + fData.magic = bytes.getInt(); + fData.nodeCount = bytes.getShort(); + fData.root = bytes.getShort(); - loadBreakCTDictionary(in); + loadBreakCTDictionary(bytes); } // Loads the compact trie dictionary file into the CompactTrieNodes - private void loadBreakCTDictionary(DataInputStream in) throws IOException { + private void loadBreakCTDictionary(ByteBuffer bytes) throws IOException { // skip over offset information for (int i = 0; i < fData.nodeCount; i++) { - in.readInt(); + bytes.getInt(); } // Create compact trie dictionary @@ -131,7 +130,7 @@ class BreakCTDictionary { // Load in compact trie dictionary for (int j = 1; j < fData.nodeCount; j++) { nodes[j] = new CompactTrieNodes(); - nodes[j].flagscount = in.readShort(); + nodes[j].flagscount = bytes.getShort(); int count = nodes[j].flagscount & CompactTrieNodeFlags.kCountMask; @@ -141,17 +140,17 @@ class BreakCTDictionary { // Vertical node if (isVerticalNode) { nodes[j].vnode = new CompactTrieVerticalNode(); - nodes[j].vnode.equal = in.readShort(); + nodes[j].vnode.equal = bytes.getShort(); nodes[j].vnode.chars = new char[count]; for (int l = 0; l < count; l++) { - nodes[j].vnode.chars[l] = in.readChar(); + nodes[j].vnode.chars[l] = bytes.getChar(); } } else { // Horizontal node nodes[j].hnode = new CompactTrieHorizontalNode[count]; for (int n = 0; n < count; n++) { - nodes[j].hnode[n] = new CompactTrieHorizontalNode(in - .readChar(), in.readShort()); + nodes[j].hnode[n] = new CompactTrieHorizontalNode( + bytes.getChar(), bytes.getShort()); } } } @@ -250,6 +249,5 @@ class BreakCTDictionary { } // Use for reading the header portion of the file - private static final byte DATA_FORMAT_ID[] = { (byte) 0x54, (byte) 0x72, - (byte) 0x44, (byte) 0x63 }; + private static final int DATA_FORMAT_ID = 0x54724463; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakDictionary.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakDictionary.java index 66068f585b2..aa0ad7dece8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakDictionary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakDictionary.java @@ -1,21 +1,22 @@ /* ******************************************************************************* - * Copyright (C) 1996-2012, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.text; -import java.io.DataInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.util.CompactByteArray; /** @@ -48,7 +49,8 @@ class BreakDictionary { static void writeToFile(String inFile, String outFile) throws FileNotFoundException, UnsupportedEncodingException, IOException { - BreakDictionary dictionary = new BreakDictionary(new FileInputStream(inFile)); + BreakDictionary dictionary = new BreakDictionary( + ICUBinary.getByteBufferFromInputStream(new FileInputStream(inFile))); PrintWriter out = null; @@ -166,59 +168,59 @@ class BreakDictionary { // deserialization //================================================================================= - /* public */ BreakDictionary(InputStream dictionaryStream) throws IOException { - readDictionaryFile(new DataInputStream(dictionaryStream)); + /* public */ BreakDictionary(ByteBuffer bytes) throws IOException { + readDictionaryFile(bytes); } - /* public */ void readDictionaryFile(DataInputStream in) throws IOException { + /* public */ void readDictionaryFile(ByteBuffer bytes) throws IOException { int l; // read in the version number (right now we just ignore it) - in.readInt(); + bytes.getInt(); // read in the column map (this is serialized in its internal form: // an index array followed by a data array) - l = in.readInt(); + l = bytes.getInt(); char[] temp = new char[l]; for (int i = 0; i < temp.length; i++) - temp[i] = (char)in.readShort(); - l = in.readInt(); + temp[i] = (char)bytes.getShort(); + l = bytes.getInt(); byte[] temp2 = new byte[l]; for (int i = 0; i < temp2.length; i++) - temp2[i] = in.readByte(); + temp2[i] = bytes.get(); columnMap = new CompactByteArray(temp, temp2); // read in numCols and numColGroups - numCols = in.readInt(); - /*numColGroups = */in.readInt(); + numCols = bytes.getInt(); + /*numColGroups = */bytes.getInt(); // read in the row-number index - l = in.readInt(); + l = bytes.getInt(); rowIndex = new short[l]; for (int i = 0; i < rowIndex.length; i++) - rowIndex[i] = in.readShort(); + rowIndex[i] = bytes.getShort(); // load in the populated-cells bitmap: index first, then bitmap list - l = in.readInt(); + l = bytes.getInt(); rowIndexFlagsIndex = new short[l]; for (int i = 0; i < rowIndexFlagsIndex.length; i++) - rowIndexFlagsIndex[i] = in.readShort(); - l = in.readInt(); + rowIndexFlagsIndex[i] = bytes.getShort(); + l = bytes.getInt(); rowIndexFlags = new int[l]; for (int i = 0; i < rowIndexFlags.length; i++) - rowIndexFlags[i] = in.readInt(); + rowIndexFlags[i] = bytes.getInt(); // load in the row-shift index - l = in.readInt(); + l = bytes.getInt(); rowIndexShifts = new byte[l]; for (int i = 0; i < rowIndexShifts.length; i++) - rowIndexShifts[i] = in.readByte(); + rowIndexShifts[i] = bytes.get(); // finally, load in the actual state table - l = in.readInt(); + l = bytes.getInt(); table = new short[l]; for (int i = 0; i < table.length; i++) - table[i] = in.readShort(); + table[i] = bytes.getShort(); // this data structure is only necessary for testing and debugging purposes reverseColumnMap = new char[numCols]; @@ -228,9 +230,6 @@ class BreakDictionary { reverseColumnMap[col] = c; } } - - // close the stream - in.close(); } //================================================================================= diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java index 92f815a4f9a..13c3428b958 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java @@ -1,17 +1,20 @@ /* ******************************************************************************* - * Copyright (C) 2002-2012, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2002-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.text; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Locale; import java.util.MissingResourceException; import com.ibm.icu.impl.Assert; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICULocaleService; import com.ibm.icu.impl.ICUResourceBundle; @@ -100,16 +103,17 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim RuleBasedBreakIterator iter = null; ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME, locale); - + // // Get the binary rules. - // - InputStream ruleStream = null; + // + ByteBuffer bytes = null; try { String typeKey = KIND_NAMES[kind]; String brkfname = rb.getStringWithFallback("boundaries/" + typeKey); String rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname; - ruleStream = ICUData.getStream(rulesFileName); + InputStream ruleStream = ICUData.getStream(rulesFileName); + bytes = ICUBinary.getByteBufferFromInputStream(ruleStream); } catch (Exception e) { throw new MissingResourceException(e.toString(),"",""); @@ -119,7 +123,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim // Create a normal RuleBasedBreakIterator. // try { - iter = RuleBasedBreakIterator.getInstanceFromCompiledRules(ruleStream); + iter = RuleBasedBreakIterator.getInstanceFromCompiledRules(bytes); } catch (IOException e) { // Shouldn't be possible to get here. @@ -130,7 +134,7 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim ULocale uloc = ULocale.forLocale(rb.getLocale()); iter.setLocale(uloc, uloc); iter.setBreakType(kind); - + return iter; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java index f02cd4a9d53..e85ce8f93cd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java @@ -1,18 +1,17 @@ -/** -******************************************************************************* -* Copyright (C) 1996-2012, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -*/ +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ package com.ibm.icu.text; -import java.io.BufferedInputStream; -import java.io.DataInputStream; import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import com.ibm.icu.impl.CharTrie; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.Trie; /** @@ -149,172 +148,171 @@ final class RBBIDataWrapper { * Get an RBBIDataWrapper from an InputStream onto a pre-compiled set * of RBBI rules. */ - static RBBIDataWrapper get(InputStream is) throws IOException { + static RBBIDataWrapper get(ByteBuffer bytes) throws IOException { int i; - - DataInputStream dis = new DataInputStream(new BufferedInputStream(is)); + RBBIDataWrapper This = new RBBIDataWrapper(); - + // Seek past the ICU data header. // TODO: verify that the header looks good. - dis.skip(0x80); - + ICUBinary.skipBytes(bytes, 0x80); + // Read in the RBBI data header... This.fHeader = new RBBIDataHeader(); - This.fHeader.fMagic = dis.readInt(); - This.fHeader.fVersion = dis.readInt(); + This.fHeader.fMagic = bytes.getInt(); + This.fHeader.fVersion = bytes.getInt(); This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24); This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16); This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8); This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion); - This.fHeader.fLength = dis.readInt(); - This.fHeader.fCatCount = dis.readInt(); - This.fHeader.fFTable = dis.readInt(); - This.fHeader.fFTableLen = dis.readInt(); - This.fHeader.fRTable = dis.readInt(); - This.fHeader.fRTableLen = dis.readInt(); - This.fHeader.fSFTable = dis.readInt(); - This.fHeader.fSFTableLen = dis.readInt(); - This.fHeader.fSRTable = dis.readInt(); - This.fHeader.fSRTableLen = dis.readInt(); - This.fHeader.fTrie = dis.readInt(); - This.fHeader.fTrieLen = dis.readInt(); - This.fHeader.fRuleSource = dis.readInt(); - This.fHeader.fRuleSourceLen = dis.readInt(); - This.fHeader.fStatusTable = dis.readInt(); - This.fHeader.fStatusTableLen = dis.readInt(); - dis.skip(6 * 4); // uint32_t fReserved[6]; - - - if (This.fHeader.fMagic != 0xb1a0 || + This.fHeader.fLength = bytes.getInt(); + This.fHeader.fCatCount = bytes.getInt(); + This.fHeader.fFTable = bytes.getInt(); + This.fHeader.fFTableLen = bytes.getInt(); + This.fHeader.fRTable = bytes.getInt(); + This.fHeader.fRTableLen = bytes.getInt(); + This.fHeader.fSFTable = bytes.getInt(); + This.fHeader.fSFTableLen = bytes.getInt(); + This.fHeader.fSRTable = bytes.getInt(); + This.fHeader.fSRTableLen = bytes.getInt(); + This.fHeader.fTrie = bytes.getInt(); + This.fHeader.fTrieLen = bytes.getInt(); + This.fHeader.fRuleSource = bytes.getInt(); + This.fHeader.fRuleSourceLen = bytes.getInt(); + This.fHeader.fStatusTable = bytes.getInt(); + This.fHeader.fStatusTableLen = bytes.getInt(); + ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6]; + + + if (This.fHeader.fMagic != 0xb1a0 || ! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier This.fHeader.fFormatVersion[0] == 3) // ICU 3.4 ) { throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version."); } - - // Current position in input stream. + + // Current position in the buffer. int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes) - + // // Read in the Forward state transition table as an array of shorts. // - + // Quick Sanity Check if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) { throw new IOException("Break iterator Rule data corrupt"); } - + // Skip over any padding preceding this table - dis.skip(This.fHeader.fFTable - pos); + ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos); pos = This.fHeader.fFTable; - + This.fFTable = new short[This.fHeader.fFTableLen / 2]; for ( i=0; i 0) { // Skip over any padding in the file - dis.skip(This.fHeader.fSFTable - pos); + ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos); pos = This.fHeader.fSFTable; - + // Create & fill the table itself. This.fSFTable = new short[This.fHeader.fSFTableLen / 2]; for (i=0; i 0) { // Skip over any padding in the file - dis.skip(This.fHeader.fSRTable - pos); + ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos); pos = This.fHeader.fSRTable; - + // Create & fill the table itself. This.fSRTable = new short[This.fHeader.fSRTableLen / 2]; for (i=0; i This.fHeader.fStatusTable) { - throw new IOException("Break iterator Rule data corrupt"); + throw new IOException("Break iterator Rule data corrupt"); } - dis.skip(This.fHeader.fStatusTable - pos); + ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos); pos = This.fHeader.fStatusTable; This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4]; for (i=0; i This.fHeader.fRuleSource) { - throw new IOException("Break iterator Rule data corrupt"); + throw new IOException("Break iterator Rule data corrupt"); } - dis.skip(This.fHeader.fRuleSource - pos); + ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos); pos = This.fHeader.fRuleSource; StringBuilder sb = new StringBuilder(This.fHeader.fRuleSourceLen / 2); for (i=0; i=0) { This.dump(); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java index c317846f1ae..b0e08d78895 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java @@ -1,9 +1,10 @@ /* ******************************************************************************* - * Copyright (C) 2005-2014 International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2005-2014 International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.text; import static com.ibm.icu.impl.CharacterIteration.DONE32; @@ -11,17 +12,18 @@ import static com.ibm.icu.impl.CharacterIteration.next32; import static com.ibm.icu.impl.CharacterIteration.nextTrail32; import static com.ibm.icu.impl.CharacterIteration.previous32; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; import java.text.CharacterIterator; import java.util.concurrent.ConcurrentHashMap; import com.ibm.icu.impl.Assert; import com.ibm.icu.impl.CharTrie; import com.ibm.icu.impl.CharacterIteration; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUDebug; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; @@ -64,10 +66,33 @@ public class RuleBasedBreakIterator extends BreakIterator { */ public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException { RuleBasedBreakIterator This = new RuleBasedBreakIterator(); - This.fRData = RBBIDataWrapper.get(is); + This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStream(is)); return This; } + /** + * Create a break iterator from a precompiled set of break rules. + * + * Creating a break iterator from the binary rules is much faster than + * creating one from source rules. + * + * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function. + * Binary break iterator rules are not guaranteed to be compatible between + * different versions of ICU. + * + * @param bytes a buffer supplying the compiled binary rules. + * @throws IOException if there is an error while reading the rules from the buffer. + * @see #compileRules(String, OutputStream) + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException { + RuleBasedBreakIterator This = new RuleBasedBreakIterator(); + This.fRData = RBBIDataWrapper.get(bytes); + return This; + } + /** * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. * @param rules The break rules to be used. @@ -78,9 +103,7 @@ public class RuleBasedBreakIterator extends BreakIterator { try { ByteArrayOutputStream ruleOS = new ByteArrayOutputStream(); compileRules(rules, ruleOS); - byte [] ruleBA = ruleOS.toByteArray(); - InputStream ruleIS = new ByteArrayInputStream(ruleBA); - fRData = RBBIDataWrapper.get(ruleIS); + fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray())); } catch (IOException e) { ///CLOVER:OFF // An IO exception can only arrive here if there is a bug in the RBBI Rule compiler,