/*
*******************************************************************************
- * Copyright (C) 1996-2011, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
+
package com.ibm.icu.text;
-import java.io.DataInputStream;
import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
import java.text.CharacterIterator;
import com.ibm.icu.impl.ICUBinary;
private CompactTrieNodes[] nodes;
// Constructor
- public BreakCTDictionary(InputStream is) throws IOException {
- ICUBinary.readHeader(is, DATA_FORMAT_ID, null);
+ public BreakCTDictionary(ByteBuffer bytes) throws IOException {
+ ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
- DataInputStream in = new DataInputStream(is);
// Get header information
fData = new CompactTrieHeader();
- fData.size = in.readInt();
- fData.magic = in.readInt();
- fData.nodeCount = in.readShort();
- fData.root = in.readShort();
+ fData.size = bytes.getInt();
+ fData.magic = bytes.getInt();
+ fData.nodeCount = bytes.getShort();
+ fData.root = bytes.getShort();
- loadBreakCTDictionary(in);
+ loadBreakCTDictionary(bytes);
}
// Loads the compact trie dictionary file into the CompactTrieNodes
- private void loadBreakCTDictionary(DataInputStream in) throws IOException {
+ private void loadBreakCTDictionary(ByteBuffer bytes) throws IOException {
// skip over offset information
for (int i = 0; i < fData.nodeCount; i++) {
- in.readInt();
+ bytes.getInt();
}
// Create compact trie dictionary
// Load in compact trie dictionary
for (int j = 1; j < fData.nodeCount; j++) {
nodes[j] = new CompactTrieNodes();
- nodes[j].flagscount = in.readShort();
+ nodes[j].flagscount = bytes.getShort();
int count = nodes[j].flagscount & CompactTrieNodeFlags.kCountMask;
// Vertical node
if (isVerticalNode) {
nodes[j].vnode = new CompactTrieVerticalNode();
- nodes[j].vnode.equal = in.readShort();
+ nodes[j].vnode.equal = bytes.getShort();
nodes[j].vnode.chars = new char[count];
for (int l = 0; l < count; l++) {
- nodes[j].vnode.chars[l] = in.readChar();
+ nodes[j].vnode.chars[l] = bytes.getChar();
}
} else { // Horizontal node
nodes[j].hnode = new CompactTrieHorizontalNode[count];
for (int n = 0; n < count; n++) {
- nodes[j].hnode[n] = new CompactTrieHorizontalNode(in
- .readChar(), in.readShort());
+ nodes[j].hnode[n] = new CompactTrieHorizontalNode(
+ bytes.getChar(), bytes.getShort());
}
}
}
}
// Use for reading the header portion of the file
- private static final byte DATA_FORMAT_ID[] = { (byte) 0x54, (byte) 0x72,
- (byte) 0x44, (byte) 0x63 };
+ private static final int DATA_FORMAT_ID = 0x54724463;
}
/*
*******************************************************************************
- * Copyright (C) 1996-2012, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
+
package com.ibm.icu.text;
-import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.util.CompactByteArray;
/**
static void writeToFile(String inFile, String outFile)
throws FileNotFoundException, UnsupportedEncodingException, IOException {
- BreakDictionary dictionary = new BreakDictionary(new FileInputStream(inFile));
+ BreakDictionary dictionary = new BreakDictionary(
+ ICUBinary.getByteBufferFromInputStream(new FileInputStream(inFile)));
PrintWriter out = null;
// deserialization
//=================================================================================
- /* public */ BreakDictionary(InputStream dictionaryStream) throws IOException {
- readDictionaryFile(new DataInputStream(dictionaryStream));
+ /* public */ BreakDictionary(ByteBuffer bytes) throws IOException {
+ readDictionaryFile(bytes);
}
- /* public */ void readDictionaryFile(DataInputStream in) throws IOException {
+ /* public */ void readDictionaryFile(ByteBuffer bytes) throws IOException {
int l;
// read in the version number (right now we just ignore it)
- in.readInt();
+ bytes.getInt();
// read in the column map (this is serialized in its internal form:
// an index array followed by a data array)
- l = in.readInt();
+ l = bytes.getInt();
char[] temp = new char[l];
for (int i = 0; i < temp.length; i++)
- temp[i] = (char)in.readShort();
- l = in.readInt();
+ temp[i] = (char)bytes.getShort();
+ l = bytes.getInt();
byte[] temp2 = new byte[l];
for (int i = 0; i < temp2.length; i++)
- temp2[i] = in.readByte();
+ temp2[i] = bytes.get();
columnMap = new CompactByteArray(temp, temp2);
// read in numCols and numColGroups
- numCols = in.readInt();
- /*numColGroups = */in.readInt();
+ numCols = bytes.getInt();
+ /*numColGroups = */bytes.getInt();
// read in the row-number index
- l = in.readInt();
+ l = bytes.getInt();
rowIndex = new short[l];
for (int i = 0; i < rowIndex.length; i++)
- rowIndex[i] = in.readShort();
+ rowIndex[i] = bytes.getShort();
// load in the populated-cells bitmap: index first, then bitmap list
- l = in.readInt();
+ l = bytes.getInt();
rowIndexFlagsIndex = new short[l];
for (int i = 0; i < rowIndexFlagsIndex.length; i++)
- rowIndexFlagsIndex[i] = in.readShort();
- l = in.readInt();
+ rowIndexFlagsIndex[i] = bytes.getShort();
+ l = bytes.getInt();
rowIndexFlags = new int[l];
for (int i = 0; i < rowIndexFlags.length; i++)
- rowIndexFlags[i] = in.readInt();
+ rowIndexFlags[i] = bytes.getInt();
// load in the row-shift index
- l = in.readInt();
+ l = bytes.getInt();
rowIndexShifts = new byte[l];
for (int i = 0; i < rowIndexShifts.length; i++)
- rowIndexShifts[i] = in.readByte();
+ rowIndexShifts[i] = bytes.get();
// finally, load in the actual state table
- l = in.readInt();
+ l = bytes.getInt();
table = new short[l];
for (int i = 0; i < table.length; i++)
- table[i] = in.readShort();
+ table[i] = bytes.getShort();
// this data structure is only necessary for testing and debugging purposes
reverseColumnMap = new char[numCols];
reverseColumnMap[col] = c;
}
}
-
- // close the stream
- in.close();
}
//=================================================================================
/*
*******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
+
package com.ibm.icu.text;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.ByteBuffer;
import java.util.Locale;
import java.util.MissingResourceException;
import com.ibm.icu.impl.Assert;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICULocaleService;
import com.ibm.icu.impl.ICUResourceBundle;
RuleBasedBreakIterator iter = null;
ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME, locale);
-
+
//
// Get the binary rules.
- //
- InputStream ruleStream = null;
+ //
+ ByteBuffer bytes = null;
try {
String typeKey = KIND_NAMES[kind];
String brkfname = rb.getStringWithFallback("boundaries/" + typeKey);
String rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname;
- ruleStream = ICUData.getStream(rulesFileName);
+ InputStream ruleStream = ICUData.getStream(rulesFileName);
+ bytes = ICUBinary.getByteBufferFromInputStream(ruleStream);
}
catch (Exception e) {
throw new MissingResourceException(e.toString(),"","");
// Create a normal RuleBasedBreakIterator.
//
try {
- iter = RuleBasedBreakIterator.getInstanceFromCompiledRules(ruleStream);
+ iter = RuleBasedBreakIterator.getInstanceFromCompiledRules(bytes);
}
catch (IOException e) {
// Shouldn't be possible to get here.
ULocale uloc = ULocale.forLocale(rb.getLocale());
iter.setLocale(uloc, uloc);
iter.setBreakType(kind);
-
+
return iter;
}
-/**
-*******************************************************************************
-* Copyright (C) 1996-2012, International Business Machines Corporation and *
-* others. All Rights Reserved. *
-*******************************************************************************
-*/
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
package com.ibm.icu.text;
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
import com.ibm.icu.impl.CharTrie;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie;
/**
* Get an RBBIDataWrapper from an InputStream onto a pre-compiled set
* of RBBI rules.
*/
- static RBBIDataWrapper get(InputStream is) throws IOException {
+ static RBBIDataWrapper get(ByteBuffer bytes) throws IOException {
int i;
-
- DataInputStream dis = new DataInputStream(new BufferedInputStream(is));
+
RBBIDataWrapper This = new RBBIDataWrapper();
-
+
// Seek past the ICU data header.
// TODO: verify that the header looks good.
- dis.skip(0x80);
-
+ ICUBinary.skipBytes(bytes, 0x80);
+
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
- This.fHeader.fMagic = dis.readInt();
- This.fHeader.fVersion = dis.readInt();
+ This.fHeader.fMagic = bytes.getInt();
+ This.fHeader.fVersion = bytes.getInt();
This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24);
This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16);
This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8);
This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion);
- This.fHeader.fLength = dis.readInt();
- This.fHeader.fCatCount = dis.readInt();
- This.fHeader.fFTable = dis.readInt();
- This.fHeader.fFTableLen = dis.readInt();
- This.fHeader.fRTable = dis.readInt();
- This.fHeader.fRTableLen = dis.readInt();
- This.fHeader.fSFTable = dis.readInt();
- This.fHeader.fSFTableLen = dis.readInt();
- This.fHeader.fSRTable = dis.readInt();
- This.fHeader.fSRTableLen = dis.readInt();
- This.fHeader.fTrie = dis.readInt();
- This.fHeader.fTrieLen = dis.readInt();
- This.fHeader.fRuleSource = dis.readInt();
- This.fHeader.fRuleSourceLen = dis.readInt();
- This.fHeader.fStatusTable = dis.readInt();
- This.fHeader.fStatusTableLen = dis.readInt();
- dis.skip(6 * 4); // uint32_t fReserved[6];
-
-
- if (This.fHeader.fMagic != 0xb1a0 ||
+ This.fHeader.fLength = bytes.getInt();
+ This.fHeader.fCatCount = bytes.getInt();
+ This.fHeader.fFTable = bytes.getInt();
+ This.fHeader.fFTableLen = bytes.getInt();
+ This.fHeader.fRTable = bytes.getInt();
+ This.fHeader.fRTableLen = bytes.getInt();
+ This.fHeader.fSFTable = bytes.getInt();
+ This.fHeader.fSFTableLen = bytes.getInt();
+ This.fHeader.fSRTable = bytes.getInt();
+ This.fHeader.fSRTableLen = bytes.getInt();
+ This.fHeader.fTrie = bytes.getInt();
+ This.fHeader.fTrieLen = bytes.getInt();
+ This.fHeader.fRuleSource = bytes.getInt();
+ This.fHeader.fRuleSourceLen = bytes.getInt();
+ This.fHeader.fStatusTable = bytes.getInt();
+ This.fHeader.fStatusTableLen = bytes.getInt();
+ ICUBinary.skipBytes(bytes, 6 * 4); // uint32_t fReserved[6];
+
+
+ if (This.fHeader.fMagic != 0xb1a0 ||
! (This.fHeader.fVersion == 1 || // ICU 3.2 and earlier
This.fHeader.fFormatVersion[0] == 3) // ICU 3.4
) {
throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
}
-
- // Current position in input stream.
+
+ // Current position in the buffer.
int pos = 24 * 4; // offset of end of header, which has 24 fields, all int32_t (4 bytes)
-
+
//
// Read in the Forward state transition table as an array of shorts.
//
-
+
// Quick Sanity Check
if (This.fHeader.fFTable < pos || This.fHeader.fFTable > This.fHeader.fLength) {
throw new IOException("Break iterator Rule data corrupt");
}
-
+
// Skip over any padding preceding this table
- dis.skip(This.fHeader.fFTable - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fFTable - pos);
pos = This.fHeader.fFTable;
-
+
This.fFTable = new short[This.fHeader.fFTableLen / 2];
for ( i=0; i<This.fFTable.length; i++) {
- This.fFTable[i] = dis.readShort();
+ This.fFTable[i] = bytes.getShort();
pos += 2;
}
-
+
//
// Read in the Reverse state table
//
-
+
// Skip over any padding in the file
- dis.skip(This.fHeader.fRTable - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fRTable - pos);
pos = This.fHeader.fRTable;
-
+
// Create & fill the table itself.
This.fRTable = new short[This.fHeader.fRTableLen / 2];
for (i=0; i<This.fRTable.length; i++) {
- This.fRTable[i] = dis.readShort();
+ This.fRTable[i] = bytes.getShort();
pos += 2;
}
-
+
//
// Read in the Safe Forward state table
- //
+ //
if (This.fHeader.fSFTableLen > 0) {
// Skip over any padding in the file
- dis.skip(This.fHeader.fSFTable - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fSFTable - pos);
pos = This.fHeader.fSFTable;
-
+
// Create & fill the table itself.
This.fSFTable = new short[This.fHeader.fSFTableLen / 2];
for (i=0; i<This.fSFTable.length; i++) {
- This.fSFTable[i] = dis.readShort();
+ This.fSFTable[i] = bytes.getShort();
pos += 2;
- }
+ }
}
-
+
//
// Read in the Safe Reverse state table
- //
+ //
if (This.fHeader.fSRTableLen > 0) {
// Skip over any padding in the file
- dis.skip(This.fHeader.fSRTable - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fSRTable - pos);
pos = This.fHeader.fSRTable;
-
+
// Create & fill the table itself.
This.fSRTable = new short[This.fHeader.fSRTableLen / 2];
for (i=0; i<This.fSRTable.length; i++) {
- This.fSRTable[i] = dis.readShort();
+ This.fSRTable[i] = bytes.getShort();
pos += 2;
- }
+ }
}
-
+
//
// Unserialize the Character categories TRIE
// Because we can't be absolutely certain where the Trie deserialize will
- // leave the input stream, leave position unchanged.
+ // leave the buffer, leave position unchanged.
// The seek to the start of the next item following the TRIE will get us
// back in sync.
//
- dis.skip(This.fHeader.fTrie - pos); // seek input stream from end of previous section to
- pos = This.fHeader.fTrie; // to the start of the trie
-
- dis.mark(This.fHeader.fTrieLen+100); // Mark position of start of TRIE in the input
+ ICUBinary.skipBytes(bytes, This.fHeader.fTrie - pos); // seek buffer from end of
+ pos = This.fHeader.fTrie; // previous section to the start of the trie
+
+ bytes.mark(); // Mark position of start of TRIE in the input
// and tell Java to keep the mark valid so long
// as we don't go more than 100 bytes past the
// past the end of the TRIE.
-
- This.fTrie = new CharTrie(dis, fTrieFoldingFunc); // Deserialize the TRIE, leaving input
- // stream at an unknown position, preceding the
+
+ This.fTrie = new CharTrie(bytes, fTrieFoldingFunc); // Deserialize the TRIE, leaving buffer
+ // at an unknown position, preceding the
// padding between TRIE and following section.
-
- dis.reset(); // Move input stream back to marked position at
+
+ bytes.reset(); // Move buffer back to marked position at
// the start of the serialized TRIE. Now our
- // "pos" variable and the input stream are in
+ // "pos" variable and the buffer are in
// agreement.
-
+
//
// Read the Rule Status Table
//
if (pos > This.fHeader.fStatusTable) {
- throw new IOException("Break iterator Rule data corrupt");
+ throw new IOException("Break iterator Rule data corrupt");
}
- dis.skip(This.fHeader.fStatusTable - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fStatusTable - pos);
pos = This.fHeader.fStatusTable;
This.fStatusTable = new int[This.fHeader.fStatusTableLen / 4];
for (i=0; i<This.fStatusTable.length; i++) {
- This.fStatusTable[i] = dis.readInt();
+ This.fStatusTable[i] = bytes.getInt();
pos += 4;
}
-
+
//
// Put the break rule source into a String
//
if (pos > This.fHeader.fRuleSource) {
- throw new IOException("Break iterator Rule data corrupt");
+ throw new IOException("Break iterator Rule data corrupt");
}
- dis.skip(This.fHeader.fRuleSource - pos);
+ ICUBinary.skipBytes(bytes, This.fHeader.fRuleSource - pos);
pos = This.fHeader.fRuleSource;
StringBuilder sb = new StringBuilder(This.fHeader.fRuleSourceLen / 2);
for (i=0; i<This.fHeader.fRuleSourceLen; i+=2) {
- sb.append(dis.readChar());
+ sb.append(bytes.getChar());
pos += 2;
}
This.fRuleSource = sb.toString();
-
+
if (RuleBasedBreakIterator.fDebugEnv!=null && RuleBasedBreakIterator.fDebugEnv.indexOf("data")>=0) {
This.dump();
}
/*
*******************************************************************************
- * Copyright (C) 2005-2014 International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 2005-2014 International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
+
package com.ibm.icu.text;
import static com.ibm.icu.impl.CharacterIteration.DONE32;
import static com.ibm.icu.impl.CharacterIteration.nextTrail32;
import static com.ibm.icu.impl.CharacterIteration.previous32;
-import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
import java.text.CharacterIterator;
import java.util.concurrent.ConcurrentHashMap;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.CharacterIteration;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUDebug;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
*/
public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
RuleBasedBreakIterator This = new RuleBasedBreakIterator();
- This.fRData = RBBIDataWrapper.get(is);
+ This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStream(is));
return This;
}
+ /**
+ * Create a break iterator from a precompiled set of break rules.
+ *
+ * Creating a break iterator from the binary rules is much faster than
+ * creating one from source rules.
+ *
+ * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
+ * Binary break iterator rules are not guaranteed to be compatible between
+ * different versions of ICU.
+ *
+ * @param bytes a buffer supplying the compiled binary rules.
+ * @throws IOException if there is an error while reading the rules from the buffer.
+ * @see #compileRules(String, OutputStream)
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ public static RuleBasedBreakIterator getInstanceFromCompiledRules(ByteBuffer bytes) throws IOException {
+ RuleBasedBreakIterator This = new RuleBasedBreakIterator();
+ This.fRData = RBBIDataWrapper.get(bytes);
+ return This;
+ }
+
/**
* Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
* @param rules The break rules to be used.
try {
ByteArrayOutputStream ruleOS = new ByteArrayOutputStream();
compileRules(rules, ruleOS);
- byte [] ruleBA = ruleOS.toByteArray();
- InputStream ruleIS = new ByteArrayInputStream(ruleBA);
- fRData = RBBIDataWrapper.get(ruleIS);
+ fRData = RBBIDataWrapper.get(ByteBuffer.wrap(ruleOS.toByteArray()));
} catch (IOException e) {
///CLOVER:OFF
// An IO exception can only arrive here if there is a bug in the RBBI Rule compiler,