import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.VersionInfo;
public final class CollationBuilder extends CollationRuleParser.Sink {
private static final boolean DEBUG = false;
// In Java, we do not have a rules version.
// In C++, the genrb build tool reads and supplies one,
// and the rulesVersion is a parameter for this method.
- VersionInfo rulesVersion = VersionInfo.getInstance(0, 0, 0, 0);
- tailoring.setVersion(base.version, rulesVersion);
+ tailoring.setVersion(base.version, 0 /* rulesVersion */);
return tailoring;
}
package com.ibm.icu.impl.coll;
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import com.ibm.icu.impl.ICUBinary;
static final int IX_RESERVED18_OFFSET = 18;
static final int IX_TOTAL_SIZE = 19;
- static void read(CollationTailoring base, InputStream inBytes,
+ static void read(CollationTailoring base, ByteBuffer inBytes,
CollationTailoring tailoring) throws IOException {
- BufferedInputStream bis = new BufferedInputStream(inBytes);
- tailoring.version = ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
+ tailoring.version = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE);
if(base != null && base.getUCAVersion() != tailoring.getUCAVersion()) {
throw new ICUException("Tailoring UCA version differs from base data UCA version");
}
- DataInputStream ds = new DataInputStream(bis);
- int indexesLength = ds.readInt(); // inIndexes[IX_INDEXES_LENGTH]
- if(indexesLength < 2) {
+ int inLength = inBytes.remaining();
+ if(inLength < 8) {
+ throw new ICUException("not enough bytes");
+ }
+ int indexesLength = inBytes.getInt(); // inIndexes[IX_INDEXES_LENGTH]
+ if(indexesLength < 2 || inLength < indexesLength * 4) {
throw new ICUException("not enough indexes");
}
int[] inIndexes = new int[IX_TOTAL_SIZE + 1];
inIndexes[0] = indexesLength;
for(int i = 1; i < indexesLength && i < inIndexes.length; ++i) {
- inIndexes[i] = ds.readInt();
+ inIndexes[i] = inBytes.getInt();
}
for(int i = indexesLength; i < inIndexes.length; ++i) {
inIndexes[i] = -1;
}
if(indexesLength > inIndexes.length) {
- ds.skipBytes((indexesLength - inIndexes.length) * 4);
+ ICUBinary.skipBytes(inBytes, (indexesLength - inIndexes.length) * 4);
}
// Assume that the tailoring data is in initial state,
int offset; // byte offset for the index part
int length; // number of bytes in the index part
+ if(indexesLength > IX_TOTAL_SIZE) {
+ length = inIndexes[IX_TOTAL_SIZE];
+ } else if(indexesLength > IX_REORDER_CODES_OFFSET) {
+ length = inIndexes[indexesLength - 1];
+ } else {
+ length = 0; // only indexes, and inLength was already checked for them
+ }
+ if(inLength < length) {
+ throw new ICUException("not enough bytes");
+ }
+
CollationData baseData = base == null ? null : base.data;
int[] reorderCodes;
index = IX_REORDER_CODES_OFFSET;
}
reorderCodes = new int[length / 4];
for(int i = 0; i < length / 4; ++i) {
- reorderCodes[i] = ds.readInt();
+ reorderCodes[i] = inBytes.getInt();
}
length &= 3;
} else {
reorderCodes = new int[0];
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
// There should be a reorder table only if there are reorder codes.
// However, when there are reorder codes the reorder table may be omitted to reduce
throw new ICUException("Reordering table without reordering codes");
}
reorderTable = new byte[256];
- ds.readFully(reorderTable);
+ inBytes.get(reorderTable);
length -= 256;
} else {
// If we have reorder codes, then build the reorderTable at the end,
// when the CollationData is otherwise complete.
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
if(baseData != null && baseData.numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000L)) {
throw new ICUException("Tailoring numeric primary weight differs from base data");
data = tailoring.ownedData;
data.base = baseData;
data.numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000L;
- data.trie = tailoring.trie = Trie2_32.createFromSerialized(ds);
+ data.trie = tailoring.trie = Trie2_32.createFromSerialized(inBytes);
int trieLength = data.trie.getSerializedLength();
if(trieLength > length) {
throw new ICUException("Not enough bytes for the mappings trie"); // No mappings.
} else {
throw new ICUException("Missing collation data mappings"); // No mappings.
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_RESERVED8_OFFSET;
offset = inIndexes[index];
length = inIndexes[index + 1] - offset;
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_CES_OFFSET;
offset = inIndexes[index];
}
data.ces = new long[length / 8];
for(int i = 0; i < length / 8; ++i) {
- data.ces[i] = ds.readLong();
+ data.ces[i] = inBytes.getLong();
}
length &= 7;
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_RESERVED10_OFFSET;
offset = inIndexes[index];
length = inIndexes[index + 1] - offset;
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_CE32S_OFFSET;
offset = inIndexes[index];
}
data.ce32s = new int[length / 4];
for(int i = 0; i < length / 4; ++i) {
- data.ce32s[i] = ds.readInt();
+ data.ce32s[i] = inBytes.getInt();
}
length &= 3;
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
int jamoCE32sStart = inIndexes[IX_JAMO_CE32S_START];
if(jamoCE32sStart >= 0) {
}
data.rootElements = new long[rootElementsLength];
for(int i = 0; i < rootElementsLength; ++i) {
- data.rootElements[i] = ds.readInt() & 0xffffffffL; // unsigned int -> long
+ data.rootElements[i] = inBytes.getInt() & 0xffffffffL; // unsigned int -> long
}
long commonSecTer = data.rootElements[CollationRootElements.IX_COMMON_SEC_AND_TER_CE];
if(commonSecTer != Collation.COMMON_SEC_AND_TER_CE) {
}
length &= 3;
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_CONTEXTS_OFFSET;
offset = inIndexes[index];
}
StringBuilder sb = new StringBuilder(length / 2);
for(int i = 0; i < length / 2; ++i) {
- sb.append(ds.readChar());
+ sb.append(inBytes.getChar());
}
data.contexts = sb.toString();
length &= 1;
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_UNSAFE_BWD_OFFSET;
offset = inIndexes[index];
USerializedSet sset = new USerializedSet();
char[] unsafeData = new char[length / 2];
for(int i = 0; i < length / 2; ++i) {
- unsafeData[i] = ds.readChar();
+ unsafeData[i] = inBytes.getChar();
}
length &= 1;
sset.getSet(unsafeData, 0);
} else {
throw new ICUException("Missing unsafe-backward-set");
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
// If the fast Latin format version is different,
// or the version is set to 0 for "no fast Latin table",
data.fastLatinTableHeader = null;
if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin.VERSION) {
if(length >= 2) {
- char header0 = ds.readChar();
+ char header0 = inBytes.getChar();
int headerLength = header0 & 0xff;
data.fastLatinTableHeader = new char[headerLength];
data.fastLatinTableHeader[0] = header0;
for(int i = 1; i < headerLength; ++i) {
- data.fastLatinTableHeader[i] = ds.readChar();
+ data.fastLatinTableHeader[i] = inBytes.getChar();
}
int tableLength = length / 2 - headerLength;
data.fastLatinTable = new char[tableLength];
for(int i = 0; i < tableLength; ++i) {
- data.fastLatinTable[i] = ds.readChar();
+ data.fastLatinTable[i] = inBytes.getChar();
}
length &= 1;
if((header0 >> 8) != CollationFastLatin.VERSION) {
}
}
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_SCRIPTS_OFFSET;
offset = inIndexes[index];
}
data.scripts = new char[length / 2];
for(int i = 0; i < length / 2; ++i) {
- data.scripts[i] = ds.readChar();
+ data.scripts[i] = inBytes.getChar();
}
length &= 1;
} else if(data == null) {
} else if(baseData != null) {
data.scripts = baseData.scripts;
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_COMPRESSIBLE_BYTES_OFFSET;
offset = inIndexes[index];
}
data.compressibleBytes = new boolean[256];
for(int i = 0; i < 256; ++i) {
- data.compressibleBytes[i] = ds.readBoolean();
+ data.compressibleBytes[i] = inBytes.get() != 0;
}
length -= 256;
} else if(data == null) {
} else {
throw new ICUException("Missing data for compressible primary lead bytes");
}
- ds.skipBytes(length);
+ ICUBinary.skipBytes(inBytes, length);
index = IX_RESERVED18_OFFSET;
offset = inIndexes[index];
length = inIndexes[index + 1] - offset;
- ds.skipBytes(length);
-
- ds.close();
+ ICUBinary.skipBytes(inBytes, length);
CollationSettings ts = tailoring.settings.readOnly();
int options = inIndexes[IX_OPTIONS] & 0xffff;
}
}
private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
- private static final byte DATA_FORMAT[] = { 0x55, 0x43, 0x6f, 0x6c }; // "UCol"
+ private static final int DATA_FORMAT = 0x55436f6c; // "UCol"
private CollationDataReader() {} // no constructor
}
package com.ibm.icu.impl.coll;
-import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.MissingResourceException;
import com.ibm.icu.impl.ICUResourceBundle;
// deserialize
UResourceBundle binary = ((ICUResourceBundle)data).get("%%CollationBin");
- byte[] inBytes = binary.getBinary(null);
- ByteArrayInputStream inStream = new ByteArrayInputStream(inBytes);
+ ByteBuffer inBytes = binary.getBinary();
try {
- CollationDataReader.read(root, inStream, t);
+ CollationDataReader.read(root, inBytes, t);
} catch (IOException e) {
throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
+ actualLocale + " type:" + type, e);
- } // No need to close BAIS.
+ }
// Try to fetch the optional rules string.
try {
import java.io.InputStream;
import java.util.MissingResourceException;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
static { // Corresponds to C++ load() function.
CollationTailoring t = new CollationTailoring(null);
+ // TODO: Optionally load from a .dat file or stand-alone .icu file.
String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
- InputStream inBytes = ICUData.getRequiredStream(path);
+ InputStream is = ICUData.getRequiredStream(path);
RuntimeException e2 = null;
try {
- CollationDataReader.read(null, inBytes, t);
+ CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t);
} catch(IOException e) {
t = null;
e2 = new MissingResourceException(
ucaVersion.getMilli() << 6,
0);
}
- void setVersion(VersionInfo baseVersion, VersionInfo rulesVersion) {
- version = VersionInfo.getInstance(
- VersionInfo.UCOL_BUILDER_VERSION.getMajor(),
- baseVersion.getMinor(),
- (baseVersion.getMilli() & 0xc0) + ((rulesVersion.getMajor() + (rulesVersion.getMajor() >> 6)) & 0x3f),
- (rulesVersion.getMinor() << 3) + (rulesVersion.getMinor() >> 5) + rulesVersion.getMilli() +
- (rulesVersion.getMicro() << 4) + (rulesVersion.getMicro() >> 4));
+ void setVersion(int baseVersion, int rulesVersion) {
+ // See comments for version field.
+ int r = (rulesVersion >> 16) & 0xff00;
+ int s = (rulesVersion >> 16) & 0xff;
+ int t = (rulesVersion >> 8) & 0xff;
+ int q = rulesVersion & 0xff;
+ version = (VersionInfo.UCOL_BUILDER_VERSION.getMajor() << 24) |
+ (baseVersion & 0xffc000) | // UCA version u.v.w
+ ((r + (r >> 6)) & 0x3f00) |
+ (((s << 3) + (s >> 5) + t + (q << 4) + (q >> 4)) & 0xff);
}
int getUCAVersion() {
- return (version.getMinor() << 4) | (version.getMilli() >> 6);
+ // Version second byte/bits 23..16 to bits 11..4,
+ // third byte/bits 15..14 to bits 1..0.
+ return ((version >> 12) & 0xff0) | ((version >> 14) & 3);
}
// data for sorting etc.
// version[1]: bits 7..3=u, bits 2..0=v
// version[2]: bits 7..6=w, bits 5..0=r
// version[3]= (s<<5)+(s>>3)+t+(q<<4)+(q>>4)
- public VersionInfo version = ZERO_VERSION;
- private static final VersionInfo ZERO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
+ public int version = 0;
// owned objects
CollationData ownedData;
*/
@Override
public VersionInfo getVersion() {
- VersionInfo version = tailoring.version;
+ int version = tailoring.version;
int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
return VersionInfo.getInstance(
- version.getMajor() + (rtVersion << 4) + (rtVersion >> 4),
- version.getMinor(), version.getMilli(), version.getMicro());
+ (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
+ ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
}
/**
/*
*******************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.Arrays;
import com.ibm.icu.util.VersionInfo;
dataVersion[2], dataVersion[3]);
}
+ /**
+ * Reads an ICU data header, checks the data format, and returns the data version.
+ *
+ * <p>Assumes that the ByteBuffer position is 0 on input.
+ * The buffer byte order is set according to the data.
+ * The buffer position is advanced past the header (including UDataInfo and comment).
+ *
+ * <p>See C++ ucmndata.h and unicode/udata.h.
+ *
+ * @return dataVersion
+ * @throws IOException if this is not a valid ICU data item of the expected dataFormat
+ */
+ public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
+ throws IOException {
+ assert bytes.position() == 0;
+ byte magic1 = bytes.get(2);
+ byte magic2 = bytes.get(3);
+ if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+ throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+ }
+
+ byte isBigEndian = bytes.get(8);
+ byte charsetFamily = bytes.get(9);
+ byte sizeofUChar = bytes.get(10);
+ if (isBigEndian < 0 || 1 < isBigEndian ||
+ charsetFamily != CHAR_SET_ || sizeofUChar != CHAR_SIZE_) {
+ throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+ }
+ bytes.order(isBigEndian != 0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
+
+ int headerSize = bytes.getChar(0);
+ int sizeofUDataInfo = bytes.getChar(4);
+ if (sizeofUDataInfo < 20 || headerSize < (sizeofUDataInfo + 4)) {
+ throw new IOException("Internal Error: Header size error");
+ }
+ // TODO: Change Authenticate to take int major, int minor, int milli, int micro
+ // to avoid array allocation.
+ byte[] formatVersion = new byte[] {
+ bytes.get(16), bytes.get(17), bytes.get(18), bytes.get(19)
+ };
+ if (bytes.get(12) != (byte)(dataFormat >> 24) ||
+ bytes.get(13) != (byte)(dataFormat >> 16) ||
+ bytes.get(14) != (byte)(dataFormat >> 8) ||
+ bytes.get(15) != (byte)dataFormat ||
+ (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
+ throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+ }
+
+ bytes.position(headerSize);
+ return // dataVersion
+ ((int)bytes.get(20) << 24) |
+ ((bytes.get(21) & 0xff) << 16) |
+ ((bytes.get(22) & 0xff) << 8) |
+ (bytes.get(23) & 0xff);
+ }
+
+ public static final void skipBytes(ByteBuffer bytes, int skipLength) {
+ if (skipLength > 0) {
+ bytes.position(bytes.position() + skipLength);
+ }
+ }
+
+ /**
+ * Reads the entire contents from the stream into a byte array
+ * and wraps it into a ByteBuffer. Closes the InputStream at the end.
+ */
+ public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
+ try {
+ int avail = is.available();
+ byte[] bytes = new byte[avail];
+ assert avail == is.read(bytes);
+ while((avail = is.available()) != 0) {
+ // TODO Java 6 replace new byte[] and arraycopy(): byte[] newBytes = Arrays.copyOf(bytes, bytes.length + avail);
+ byte[] newBytes = new byte[bytes.length + avail];
+ System.arraycopy(bytes, 0, newBytes, 0, bytes.length);
+ assert avail == is.read(newBytes, bytes.length, avail);
+ bytes = newBytes;
+ }
+ return ByteBuffer.wrap(bytes);
+ } finally {
+ is.close();
+ }
+ }
+
// private variables -------------------------------------------------
/**
/*
*******************************************************************************
- * Copyright (C) 2009-2011, International Business Machines Corporation and
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.Iterator;
import java.util.NoSuchElementException;
}
- /**
+ /**
* Get the UTrie version from an InputStream containing the serialized form
* of either a Trie (version 1) or a Trie2 (version 2).
*
}
return 0;
}
-
-
+
+ /**
+ * Deserializes a Trie2 from a ByteBuffer.
+ * Reads from the current position and leaves the buffer after the end of the trie.
+ */
+ public static Trie2 createFromSerialized(ByteBuffer bytes) throws IOException {
+ ByteOrder outerByteOrder = bytes.order();
+ try {
+ UTrie2Header header = new UTrie2Header();
+
+ /* check the signature */
+ header.signature = bytes.getInt();
+ switch (header.signature) {
+ case 0x54726932:
+ bytes.order(ByteOrder.BIG_ENDIAN);
+ break;
+ case 0x32697254:
+ bytes.order(ByteOrder.LITTLE_ENDIAN);
+ header.signature = 0x54726932;
+ break;
+ default:
+ throw new IllegalArgumentException("Buffer does not contain a serialized UTrie2");
+ }
+
+ header.options = bytes.getChar();
+ header.indexLength = bytes.getChar();
+ header.shiftedDataLength = bytes.getChar();
+ header.index2NullOffset = bytes.getChar();
+ header.dataNullOffset = bytes.getChar();
+ header.shiftedHighStart = bytes.getChar();
+
+ // Trie2 data width - 0: 16 bits
+ // 1: 32 bits
+ if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1) {
+ throw new IllegalArgumentException("UTrie2 serialized format error.");
+ }
+ ValueWidth width;
+ Trie2 This;
+ if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0) {
+ width = ValueWidth.BITS_16;
+ This = new Trie2_16();
+ } else {
+ width = ValueWidth.BITS_32;
+ This = new Trie2_32();
+ }
+ This.header = header;
+
+ /* get the length values and offsets */
+ This.indexLength = header.indexLength;
+ This.dataLength = header.shiftedDataLength << UTRIE2_INDEX_SHIFT;
+ This.index2NullOffset = header.index2NullOffset;
+ This.dataNullOffset = header.dataNullOffset;
+ This.highStart = header.shiftedHighStart << UTRIE2_SHIFT_1;
+ This.highValueIndex = This.dataLength - UTRIE2_DATA_GRANULARITY;
+ if (width == ValueWidth.BITS_16) {
+ This.highValueIndex += This.indexLength;
+ }
+
+ // Allocate the Trie2 index array. If the data width is 16 bits, the array also
+ // includes the space for the data.
+
+ int indexArraySize = This.indexLength;
+ if (width == ValueWidth.BITS_16) {
+ indexArraySize += This.dataLength;
+ }
+ This.index = new char[indexArraySize];
+
+ /* Read in the index */
+ int i;
+ for (i=0; i<This.indexLength; i++) {
+ This.index[i] = bytes.getChar();
+ }
+
+ /* Read in the data. 16 bit data goes in the same array as the index.
+ * 32 bit data goes in its own separate data array.
+ */
+ if (width == ValueWidth.BITS_16) {
+ This.data16 = This.indexLength;
+ for (i=0; i<This.dataLength; i++) {
+ This.index[This.data16 + i] = bytes.getChar();
+ }
+ } else {
+ This.data32 = new int[This.dataLength];
+ for (i=0; i<This.dataLength; i++) {
+ This.data32[i] = bytes.getInt();
+ }
+ }
+
+ switch(width) {
+ case BITS_16:
+ This.data32 = null;
+ This.initialValue = This.index[This.dataNullOffset];
+ This.errorValue = This.index[This.data16+UTRIE2_BAD_UTF8_DATA_OFFSET];
+ break;
+ case BITS_32:
+ This.data16=0;
+ This.initialValue = This.data32[This.dataNullOffset];
+ This.errorValue = This.data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
+ break;
+ default:
+ throw new IllegalArgumentException("UTrie2 serialized format error.");
+ }
+
+ return This;
+ } finally {
+ bytes.order(outerByteOrder);
+ }
+ }
+
/**
* Get the value for a code point as stored in the Trie2.
*
/*
*******************************************************************************
- * Copyright (C) 2009-2010, International Business Machines Corporation and
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
/**
* @author aheninger
return (Trie2_32) Trie2.createFromSerialized(is);
}
+ public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException {
+ return (Trie2_32) Trie2.createFromSerialized(bytes);
+ }
+
/**
* Get the value for a code point as stored in the Trie2.
*