From 3cd4f315136e6ad534cfacda6895c2763574c793 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sun, 1 Jun 2014 00:15:06 +0000 Subject: [PATCH] ICU-7057 make ICU4J CollationDataReader work directly with ByteBuffer rather than DataInputStream X-SVN-Rev: 35786 --- .../ibm/icu/impl/coll/CollationBuilder.java | 4 +- .../icu/impl/coll/CollationDataReader.java | 91 ++++++++------ .../ibm/icu/impl/coll/CollationLoader.java | 9 +- .../com/ibm/icu/impl/coll/CollationRoot.java | 6 +- .../ibm/icu/impl/coll/CollationTailoring.java | 24 ++-- .../com/ibm/icu/text/RuleBasedCollator.java | 6 +- .../core/src/com/ibm/icu/impl/ICUBinary.java | 90 +++++++++++++- .../core/src/com/ibm/icu/impl/Trie2.java | 117 +++++++++++++++++- .../core/src/com/ibm/icu/impl/Trie2_32.java | 7 +- 9 files changed, 283 insertions(+), 71 deletions(-) diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java index 81d108ebc55..d7e64976007 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java @@ -23,7 +23,6 @@ import com.ibm.icu.text.Normalizer2; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.ULocale; -import com.ibm.icu.util.VersionInfo; public final class CollationBuilder extends CollationRuleParser.Sink { private static final boolean DEBUG = false; @@ -97,8 +96,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink { // In Java, we do not have a rules version. // In C++, the genrb build tool reads and supplies one, // and the rulesVersion is a parameter for this method. - VersionInfo rulesVersion = VersionInfo.getInstance(0, 0, 0, 0); - tailoring.setVersion(base.version, rulesVersion); + tailoring.setVersion(base.version, 0 /* rulesVersion */); return tailoring; } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java index 60d10660ae0..527d1923652 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java @@ -11,10 +11,8 @@ package com.ibm.icu.impl.coll; -import java.io.BufferedInputStream; -import java.io.DataInputStream; import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Arrays; import com.ibm.icu.impl.ICUBinary; @@ -95,29 +93,31 @@ final class CollationDataReader /* all static */ { static final int IX_RESERVED18_OFFSET = 18; static final int IX_TOTAL_SIZE = 19; - static void read(CollationTailoring base, InputStream inBytes, + static void read(CollationTailoring base, ByteBuffer inBytes, CollationTailoring tailoring) throws IOException { - BufferedInputStream bis = new BufferedInputStream(inBytes); - tailoring.version = ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE); + tailoring.version = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE); if(base != null && base.getUCAVersion() != tailoring.getUCAVersion()) { throw new ICUException("Tailoring UCA version differs from base data UCA version"); } - DataInputStream ds = new DataInputStream(bis); - int indexesLength = ds.readInt(); // inIndexes[IX_INDEXES_LENGTH] - if(indexesLength < 2) { + int inLength = inBytes.remaining(); + if(inLength < 8) { + throw new ICUException("not enough bytes"); + } + int indexesLength = inBytes.getInt(); // inIndexes[IX_INDEXES_LENGTH] + if(indexesLength < 2 || inLength < indexesLength * 4) { throw new ICUException("not enough indexes"); } int[] inIndexes = new int[IX_TOTAL_SIZE + 1]; inIndexes[0] = indexesLength; for(int i = 1; i < indexesLength && i < inIndexes.length; ++i) { - inIndexes[i] = ds.readInt(); + inIndexes[i] = inBytes.getInt(); } for(int i = indexesLength; i < inIndexes.length; ++i) { inIndexes[i] = -1; } if(indexesLength > inIndexes.length) { - ds.skipBytes((indexesLength - inIndexes.length) * 4); + ICUBinary.skipBytes(inBytes, (indexesLength - inIndexes.length) * 4); } // Assume that the tailoring data is in initial state, @@ -130,6 +130,17 @@ final class CollationDataReader /* all static */ { int offset; // byte offset for the index part int length; // number of bytes in the index part + if(indexesLength > IX_TOTAL_SIZE) { + length = inIndexes[IX_TOTAL_SIZE]; + } else if(indexesLength > IX_REORDER_CODES_OFFSET) { + length = inIndexes[indexesLength - 1]; + } else { + length = 0; // only indexes, and inLength was already checked for them + } + if(inLength < length) { + throw new ICUException("not enough bytes"); + } + CollationData baseData = base == null ? null : base.data; int[] reorderCodes; index = IX_REORDER_CODES_OFFSET; @@ -143,13 +154,13 @@ final class CollationDataReader /* all static */ { } reorderCodes = new int[length / 4]; for(int i = 0; i < length / 4; ++i) { - reorderCodes[i] = ds.readInt(); + reorderCodes[i] = inBytes.getInt(); } length &= 3; } else { reorderCodes = new int[0]; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); // There should be a reorder table only if there are reorder codes. // However, when there are reorder codes the reorder table may be omitted to reduce @@ -163,13 +174,13 @@ final class CollationDataReader /* all static */ { throw new ICUException("Reordering table without reordering codes"); } reorderTable = new byte[256]; - ds.readFully(reorderTable); + inBytes.get(reorderTable); length -= 256; } else { // If we have reorder codes, then build the reorderTable at the end, // when the CollationData is otherwise complete. } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); if(baseData != null && baseData.numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000L)) { throw new ICUException("Tailoring numeric primary weight differs from base data"); @@ -184,7 +195,7 @@ final class CollationDataReader /* all static */ { data = tailoring.ownedData; data.base = baseData; data.numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000L; - data.trie = tailoring.trie = Trie2_32.createFromSerialized(ds); + data.trie = tailoring.trie = Trie2_32.createFromSerialized(inBytes); int trieLength = data.trie.getSerializedLength(); if(trieLength > length) { throw new ICUException("Not enough bytes for the mappings trie"); // No mappings. @@ -196,12 +207,12 @@ final class CollationDataReader /* all static */ { } else { throw new ICUException("Missing collation data mappings"); // No mappings. } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_RESERVED8_OFFSET; offset = inIndexes[index]; length = inIndexes[index + 1] - offset; - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_CES_OFFSET; offset = inIndexes[index]; @@ -212,16 +223,16 @@ final class CollationDataReader /* all static */ { } data.ces = new long[length / 8]; for(int i = 0; i < length / 8; ++i) { - data.ces[i] = ds.readLong(); + data.ces[i] = inBytes.getLong(); } length &= 7; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_RESERVED10_OFFSET; offset = inIndexes[index]; length = inIndexes[index + 1] - offset; - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_CE32S_OFFSET; offset = inIndexes[index]; @@ -232,11 +243,11 @@ final class CollationDataReader /* all static */ { } data.ce32s = new int[length / 4]; for(int i = 0; i < length / 4; ++i) { - data.ce32s[i] = ds.readInt(); + data.ce32s[i] = inBytes.getInt(); } length &= 3; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); int jamoCE32sStart = inIndexes[IX_JAMO_CE32S_START]; if(jamoCE32sStart >= 0) { @@ -266,7 +277,7 @@ final class CollationDataReader /* all static */ { } data.rootElements = new long[rootElementsLength]; for(int i = 0; i < rootElementsLength; ++i) { - data.rootElements[i] = ds.readInt() & 0xffffffffL; // unsigned int -> long + data.rootElements[i] = inBytes.getInt() & 0xffffffffL; // unsigned int -> long } long commonSecTer = data.rootElements[CollationRootElements.IX_COMMON_SEC_AND_TER_CE]; if(commonSecTer != Collation.COMMON_SEC_AND_TER_CE) { @@ -280,7 +291,7 @@ final class CollationDataReader /* all static */ { } length &= 3; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_CONTEXTS_OFFSET; offset = inIndexes[index]; @@ -291,12 +302,12 @@ final class CollationDataReader /* all static */ { } StringBuilder sb = new StringBuilder(length / 2); for(int i = 0; i < length / 2; ++i) { - sb.append(ds.readChar()); + sb.append(inBytes.getChar()); } data.contexts = sb.toString(); length &= 1; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_UNSAFE_BWD_OFFSET; offset = inIndexes[index]; @@ -327,7 +338,7 @@ final class CollationDataReader /* all static */ { USerializedSet sset = new USerializedSet(); char[] unsafeData = new char[length / 2]; for(int i = 0; i < length / 2; ++i) { - unsafeData[i] = ds.readChar(); + unsafeData[i] = inBytes.getChar(); } length &= 1; sset.getSet(unsafeData, 0); @@ -355,7 +366,7 @@ final class CollationDataReader /* all static */ { } else { throw new ICUException("Missing unsafe-backward-set"); } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); // If the fast Latin format version is different, // or the version is set to 0 for "no fast Latin table", @@ -368,17 +379,17 @@ final class CollationDataReader /* all static */ { data.fastLatinTableHeader = null; if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin.VERSION) { if(length >= 2) { - char header0 = ds.readChar(); + char header0 = inBytes.getChar(); int headerLength = header0 & 0xff; data.fastLatinTableHeader = new char[headerLength]; data.fastLatinTableHeader[0] = header0; for(int i = 1; i < headerLength; ++i) { - data.fastLatinTableHeader[i] = ds.readChar(); + data.fastLatinTableHeader[i] = inBytes.getChar(); } int tableLength = length / 2 - headerLength; data.fastLatinTable = new char[tableLength]; for(int i = 0; i < tableLength; ++i) { - data.fastLatinTable[i] = ds.readChar(); + data.fastLatinTable[i] = inBytes.getChar(); } length &= 1; if((header0 >> 8) != CollationFastLatin.VERSION) { @@ -390,7 +401,7 @@ final class CollationDataReader /* all static */ { } } } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_SCRIPTS_OFFSET; offset = inIndexes[index]; @@ -401,7 +412,7 @@ final class CollationDataReader /* all static */ { } data.scripts = new char[length / 2]; for(int i = 0; i < length / 2; ++i) { - data.scripts[i] = ds.readChar(); + data.scripts[i] = inBytes.getChar(); } length &= 1; } else if(data == null) { @@ -409,7 +420,7 @@ final class CollationDataReader /* all static */ { } else if(baseData != null) { data.scripts = baseData.scripts; } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_COMPRESSIBLE_BYTES_OFFSET; offset = inIndexes[index]; @@ -420,7 +431,7 @@ final class CollationDataReader /* all static */ { } data.compressibleBytes = new boolean[256]; for(int i = 0; i < 256; ++i) { - data.compressibleBytes[i] = ds.readBoolean(); + data.compressibleBytes[i] = inBytes.get() != 0; } length -= 256; } else if(data == null) { @@ -430,14 +441,12 @@ final class CollationDataReader /* all static */ { } else { throw new ICUException("Missing data for compressible primary lead bytes"); } - ds.skipBytes(length); + ICUBinary.skipBytes(inBytes, length); index = IX_RESERVED18_OFFSET; offset = inIndexes[index]; length = inIndexes[index + 1] - offset; - ds.skipBytes(length); - - ds.close(); + ICUBinary.skipBytes(inBytes, length); CollationSettings ts = tailoring.settings.readOnly(); int options = inIndexes[IX_OPTIONS] & 0xffff; @@ -481,7 +490,7 @@ final class CollationDataReader /* all static */ { } } private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); - private static final byte DATA_FORMAT[] = { 0x55, 0x43, 0x6f, 0x6c }; // "UCol" + private static final int DATA_FORMAT = 0x55436f6c; // "UCol" private CollationDataReader() {} // no constructor } diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java index ded3e9bccf3..8ec297fa6fa 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java @@ -13,8 +13,8 @@ package com.ibm.icu.impl.coll; -import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.MissingResourceException; import com.ibm.icu.impl.ICUResourceBundle; @@ -203,14 +203,13 @@ public final class CollationLoader { // deserialize UResourceBundle binary = ((ICUResourceBundle)data).get("%%CollationBin"); - byte[] inBytes = binary.getBinary(null); - ByteArrayInputStream inStream = new ByteArrayInputStream(inBytes); + ByteBuffer inBytes = binary.getBinary(); try { - CollationDataReader.read(root, inStream, t); + CollationDataReader.read(root, inBytes, t); } catch (IOException e) { throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:" + actualLocale + " type:" + type, e); - } // No need to close BAIS. + } // Try to fetch the optional rules string. try { diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java index b75949bedc8..13c0071f736 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java @@ -15,6 +15,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.MissingResourceException; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; @@ -42,11 +43,12 @@ public final class CollationRoot { // purely static static { // Corresponds to C++ load() function. CollationTailoring t = new CollationTailoring(null); + // TODO: Optionally load from a .dat file or stand-alone .icu file. String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu"; - InputStream inBytes = ICUData.getRequiredStream(path); + InputStream is = ICUData.getRequiredStream(path); RuntimeException e2 = null; try { - CollationDataReader.read(null, inBytes, t); + CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t); } catch(IOException e) { t = null; e2 = new MissingResourceException( diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java index b1db1019243..b49aafbfd89 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java @@ -54,16 +54,21 @@ public final class CollationTailoring { ucaVersion.getMilli() << 6, 0); } - void setVersion(VersionInfo baseVersion, VersionInfo rulesVersion) { - version = VersionInfo.getInstance( - VersionInfo.UCOL_BUILDER_VERSION.getMajor(), - baseVersion.getMinor(), - (baseVersion.getMilli() & 0xc0) + ((rulesVersion.getMajor() + (rulesVersion.getMajor() >> 6)) & 0x3f), - (rulesVersion.getMinor() << 3) + (rulesVersion.getMinor() >> 5) + rulesVersion.getMilli() + - (rulesVersion.getMicro() << 4) + (rulesVersion.getMicro() >> 4)); + void setVersion(int baseVersion, int rulesVersion) { + // See comments for version field. + int r = (rulesVersion >> 16) & 0xff00; + int s = (rulesVersion >> 16) & 0xff; + int t = (rulesVersion >> 8) & 0xff; + int q = rulesVersion & 0xff; + version = (VersionInfo.UCOL_BUILDER_VERSION.getMajor() << 24) | + (baseVersion & 0xffc000) | // UCA version u.v.w + ((r + (r >> 6)) & 0x3f00) | + (((s << 3) + (s >> 5) + t + (q << 4) + (q >> 4)) & 0xff); } int getUCAVersion() { - return (version.getMinor() << 4) | (version.getMilli() >> 6); + // Version second byte/bits 23..16 to bits 11..4, + // third byte/bits 15..14 to bits 1..0. + return ((version >> 12) & 0xff0) | ((version >> 14) & 3); } // data for sorting etc. @@ -78,8 +83,7 @@ public final class CollationTailoring { // version[1]: bits 7..3=u, bits 2..0=v // version[2]: bits 7..6=w, bits 5..0=r // version[3]= (s<<5)+(s>>3)+t+(q<<4)+(q>>4) - public VersionInfo version = ZERO_VERSION; - private static final VersionInfo ZERO_VERSION = VersionInfo.getInstance(0, 0, 0, 0); + public int version = 0; // owned objects CollationData ownedData; diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java index 1ee5ba2b447..519840e631f 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java @@ -1817,11 +1817,11 @@ public final class RuleBasedCollator extends Collator { */ @Override public VersionInfo getVersion() { - VersionInfo version = tailoring.version; + int version = tailoring.version; int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); return VersionInfo.getInstance( - version.getMajor() + (rtVersion << 4) + (rtVersion >> 4), - version.getMinor(), version.getMilli(), version.getMicro()); + (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4), + ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff)); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 634f418d254..730b89d35a8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -1,7 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 1996-2010, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.impl; @@ -9,6 +9,8 @@ package com.ibm.icu.impl; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.Arrays; import com.ibm.icu.util.VersionInfo; @@ -146,6 +148,90 @@ public final class ICUBinary dataVersion[2], dataVersion[3]); } + /** + * Reads an ICU data header, checks the data format, and returns the data version. + * + *

Assumes that the ByteBuffer position is 0 on input. + * The buffer byte order is set according to the data. + * The buffer position is advanced past the header (including UDataInfo and comment). + * + *

See C++ ucmndata.h and unicode/udata.h. + * + * @return dataVersion + * @throws IOException if this is not a valid ICU data item of the expected dataFormat + */ + public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate) + throws IOException { + assert bytes.position() == 0; + byte magic1 = bytes.get(2); + byte magic2 = bytes.get(3); + if (magic1 != MAGIC1 || magic2 != MAGIC2) { + throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); + } + + byte isBigEndian = bytes.get(8); + byte charsetFamily = bytes.get(9); + byte sizeofUChar = bytes.get(10); + if (isBigEndian < 0 || 1 < isBigEndian || + charsetFamily != CHAR_SET_ || sizeofUChar != CHAR_SIZE_) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_); + } + bytes.order(isBigEndian != 0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN); + + int headerSize = bytes.getChar(0); + int sizeofUDataInfo = bytes.getChar(4); + if (sizeofUDataInfo < 20 || headerSize < (sizeofUDataInfo + 4)) { + throw new IOException("Internal Error: Header size error"); + } + // TODO: Change Authenticate to take int major, int minor, int milli, int micro + // to avoid array allocation. + byte[] formatVersion = new byte[] { + bytes.get(16), bytes.get(17), bytes.get(18), bytes.get(19) + }; + if (bytes.get(12) != (byte)(dataFormat >> 24) || + bytes.get(13) != (byte)(dataFormat >> 16) || + bytes.get(14) != (byte)(dataFormat >> 8) || + bytes.get(15) != (byte)dataFormat || + (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) { + throw new IOException(HEADER_AUTHENTICATION_FAILED_); + } + + bytes.position(headerSize); + return // dataVersion + ((int)bytes.get(20) << 24) | + ((bytes.get(21) & 0xff) << 16) | + ((bytes.get(22) & 0xff) << 8) | + (bytes.get(23) & 0xff); + } + + public static final void skipBytes(ByteBuffer bytes, int skipLength) { + if (skipLength > 0) { + bytes.position(bytes.position() + skipLength); + } + } + + /** + * Reads the entire contents from the stream into a byte array + * and wraps it into a ByteBuffer. Closes the InputStream at the end. + */ + public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException { + try { + int avail = is.available(); + byte[] bytes = new byte[avail]; + assert avail == is.read(bytes); + while((avail = is.available()) != 0) { + // TODO Java 6 replace new byte[] and arraycopy(): byte[] newBytes = Arrays.copyOf(bytes, bytes.length + avail); + byte[] newBytes = new byte[bytes.length + avail]; + System.arraycopy(bytes, 0, newBytes, 0, bytes.length); + assert avail == is.read(newBytes, bytes.length, avail); + bytes = newBytes; + } + return ByteBuffer.wrap(bytes); + } finally { + is.close(); + } + } + // private variables ------------------------------------------------- /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java index 48bb9cc49a3..af8137b63b3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2009-2011, International Business Machines Corporation and + * Copyright (C) 2009-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -10,6 +10,8 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.Iterator; import java.util.NoSuchElementException; @@ -200,7 +202,7 @@ public abstract class Trie2 implements Iterable { } - /** + /** * Get the UTrie version from an InputStream containing the serialized form * of either a Trie (version 1) or a Trie2 (version 2). * @@ -242,8 +244,115 @@ public abstract class Trie2 implements Iterable { } return 0; } - - + + /** + * Deserializes a Trie2 from a ByteBuffer. + * Reads from the current position and leaves the buffer after the end of the trie. + */ + public static Trie2 createFromSerialized(ByteBuffer bytes) throws IOException { + ByteOrder outerByteOrder = bytes.order(); + try { + UTrie2Header header = new UTrie2Header(); + + /* check the signature */ + header.signature = bytes.getInt(); + switch (header.signature) { + case 0x54726932: + bytes.order(ByteOrder.BIG_ENDIAN); + break; + case 0x32697254: + bytes.order(ByteOrder.LITTLE_ENDIAN); + header.signature = 0x54726932; + break; + default: + throw new IllegalArgumentException("Buffer does not contain a serialized UTrie2"); + } + + header.options = bytes.getChar(); + header.indexLength = bytes.getChar(); + header.shiftedDataLength = bytes.getChar(); + header.index2NullOffset = bytes.getChar(); + header.dataNullOffset = bytes.getChar(); + header.shiftedHighStart = bytes.getChar(); + + // Trie2 data width - 0: 16 bits + // 1: 32 bits + if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1) { + throw new IllegalArgumentException("UTrie2 serialized format error."); + } + ValueWidth width; + Trie2 This; + if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0) { + width = ValueWidth.BITS_16; + This = new Trie2_16(); + } else { + width = ValueWidth.BITS_32; + This = new Trie2_32(); + } + This.header = header; + + /* get the length values and offsets */ + This.indexLength = header.indexLength; + This.dataLength = header.shiftedDataLength << UTRIE2_INDEX_SHIFT; + This.index2NullOffset = header.index2NullOffset; + This.dataNullOffset = header.dataNullOffset; + This.highStart = header.shiftedHighStart << UTRIE2_SHIFT_1; + This.highValueIndex = This.dataLength - UTRIE2_DATA_GRANULARITY; + if (width == ValueWidth.BITS_16) { + This.highValueIndex += This.indexLength; + } + + // Allocate the Trie2 index array. If the data width is 16 bits, the array also + // includes the space for the data. + + int indexArraySize = This.indexLength; + if (width == ValueWidth.BITS_16) { + indexArraySize += This.dataLength; + } + This.index = new char[indexArraySize]; + + /* Read in the index */ + int i; + for (i=0; i