From 7183e03abe8b21b082e6d7f6023dcd50805099a1 Mon Sep 17 00:00:00 2001 From: Fredrik Roubert Date: Tue, 15 Jul 2014 21:28:31 +0000 Subject: [PATCH] ICU-10944 Delete InputStream code that now no longer is used. R=markus.icu@gmail.com Review URL: https://codereview.appspot.com/109640044 X-SVN-Rev: 36043 --- .../core/src/com/ibm/icu/impl/CharTrie.java | 43 ---- .../core/src/com/ibm/icu/impl/ICUBinary.java | 116 --------- .../core/src/com/ibm/icu/impl/IntTrie.java | 33 +-- .../core/src/com/ibm/icu/impl/Trie.java | 52 ---- .../core/src/com/ibm/icu/impl/Trie2.java | 239 +++++------------- .../core/src/com/ibm/icu/impl/Trie2_16.java | 20 -- .../core/src/com/ibm/icu/impl/Trie2_32.java | 18 +- 7 files changed, 82 insertions(+), 439 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java index c08d0231884..ce01b0bbd12 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java @@ -7,9 +7,6 @@ package com.ibm.icu.impl; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import com.ibm.icu.text.UTF16; @@ -27,27 +24,6 @@ public class CharTrie extends Trie { // public constructors --------------------------------------------- - /** - *

Creates a new Trie with the settings for the trie data.

- *

Unserialize the 32-bit-aligned input stream and use the data for the - * trie.

- * @param inputStream file input stream to a ICU data file, containing - * the trie - * @param dataManipulate object which provides methods to parse the char - * data - * @throws IOException thrown when data reading fails - */ - public CharTrie(InputStream inputStream, - DataManipulate dataManipulate) throws IOException - { - super(inputStream, dataManipulate); - - if (!isCharTrie()) { - throw new IllegalArgumentException( - "Data given does not belong to a char trie."); - } - } - /** *

Creates a new Trie with the settings for the trie data.

*

Unserialize the 32-bit-aligned input buffer and use the data for the @@ -253,25 +229,6 @@ public class CharTrie extends Trie // protected methods ----------------------------------------------- - /** - *

Parses the input stream and stores its trie content into a index and - * data array

- * @param inputStream data input stream containing trie data - * @exception IOException thrown when data reading fails - */ - protected final void unserialize(InputStream inputStream) - throws IOException - { - DataInputStream input = new DataInputStream(inputStream); - int indexDataLength = m_dataOffset_ + m_dataLength_; - m_index_ = new char[indexDataLength]; - for (int i = 0; i < indexDataLength; i ++) { - m_index_[i] = input.readChar(); - } - m_data_ = m_index_; - m_initialValue_ = m_data_[m_dataOffset_]; - } - /** *

Parses the byte buffer and stores its trie content into a index and * data array

diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 51ef8681be9..b345d74f038 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -7,12 +7,10 @@ package com.ibm.icu.impl; -import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Arrays; import com.ibm.icu.util.VersionInfo; @@ -35,119 +33,6 @@ public final class ICUBinary } // public methods -------------------------------------------------------- - - /** - *

ICU data header reader method. - * Takes a ICU generated big-endian input stream, parse the ICU standard - * file header and authenticates them.

- *

Header format: - *

- *

- *

- * Example of use:
- *

-    * try {
-    *    FileInputStream input = new FileInputStream(filename);
-    *    If (Utility.readICUDataHeader(input, dataformat, dataversion, 
-    *                                  unicode) {
-    *        System.out.println("Verified file header, this is a ICU data file");
-    *    }
-    * } catch (IOException e) {
-    *    System.out.println("This is not a ICU data file");
-    * }
-    * 
- *

- * @param inputStream input stream that contains the ICU data header - * @param dataFormatIDExpected Data format expected. An array of 4 bytes - * information about the data format. - * E.g. data format ID 1.2.3.4. will became an array of - * {1, 2, 3, 4} - * @param authenticate user defined extra data authentication. This value - * can be null, if no extra authentication is needed. - * @exception IOException thrown if there is a read error or - * when header authentication fails. - */ - public static final byte[] readHeader(InputStream inputStream, - byte dataFormatIDExpected[], - Authenticate authenticate) - throws IOException - { - DataInputStream input = new DataInputStream(inputStream); - char headersize = input.readChar(); - int readcount = 2; - //reading the header format - byte magic1 = input.readByte(); - readcount ++; - byte magic2 = input.readByte(); - readcount ++; - if (magic1 != MAGIC1 || magic2 != MAGIC2) { - throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_); - } - - input.readChar(); // reading size - readcount += 2; - input.readChar(); // reading reserved word - readcount += 2; - byte bigendian = input.readByte(); - readcount ++; - byte charset = input.readByte(); - readcount ++; - byte charsize = input.readByte(); - readcount ++; - input.readByte(); // reading reserved byte - readcount ++; - - byte dataFormatID[] = new byte[4]; - input.readFully(dataFormatID); - readcount += 4; - byte dataVersion[] = new byte[4]; - input.readFully(dataVersion); - readcount += 4; - byte unicodeVersion[] = new byte[4]; - input.readFully(unicodeVersion); - readcount += 4; - if (headersize < readcount) { - throw new IOException("Internal Error: Header size error"); - } - input.skipBytes(headersize - readcount); - - if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_ - || charsize != CHAR_SIZE_ - || !Arrays.equals(dataFormatIDExpected, dataFormatID) - || (authenticate != null - && !authenticate.isDataVersionAcceptable(dataVersion))) { - throw new IOException(HEADER_AUTHENTICATION_FAILED_); - } - return unicodeVersion; - } - - /** - * Same as readHeader(), but returns a VersionInfo rather than a byte[]. - */ - public static final VersionInfo readHeaderAndDataVersion(InputStream inputStream, - byte dataFormatIDExpected[], - Authenticate authenticate) - throws IOException { - byte[] dataVersion = readHeader(inputStream, dataFormatIDExpected, authenticate); - return VersionInfo.getInstance(dataVersion[0], dataVersion[1], - dataVersion[2], dataVersion[3]); - } /** * Same as readHeader(), but returns a VersionInfo rather than a compact int. @@ -284,7 +169,6 @@ public final class ICUBinary /** * File format authentication values */ - private static final byte BIG_ENDIAN_ = 1; private static final byte CHAR_SET_ = 0; private static final byte CHAR_SIZE_ = 2; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java index 3b2ecfad572..475892adbe8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java @@ -1,15 +1,14 @@ /* -****************************************************************************** -* Copyright (C) 1996-2011, International Business Machines Corporation and * -* others. All Rights Reserved. * -****************************************************************************** -*/ + ****************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ****************************************************************************** + */ package com.ibm.icu.impl; -import java.io.DataInputStream; import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Arrays; import com.ibm.icu.text.UTF16; @@ -28,16 +27,15 @@ public class IntTrie extends Trie *

Creates a new Trie with the settings for the trie data.

*

Unserialize the 32-bit-aligned input stream and use the data for the * trie.

- * @param inputStream file input stream to a ICU data file, containing - * the trie + * @param bytes file buffer to a ICU data file, containing the trie * @param dataManipulate object which provides methods to parse the char * data * @throws IOException thrown when data reading fails */ - public IntTrie(InputStream inputStream, DataManipulate dataManipulate) + public IntTrie(ByteBuffer bytes, DataManipulate dataManipulate) throws IOException { - super(inputStream, dataManipulate); + super(bytes, dataManipulate); if (!isIntTrie()) { throw new IllegalArgumentException( "Data given does not belong to a int trie."); @@ -244,22 +242,19 @@ public class IntTrie extends Trie /** *

Parses the input stream and stores its trie content into a index and * data array

- * @param inputStream data input stream containing trie data - * @exception IOException thrown when data reading fails + * @param bytes data buffer containing trie data */ - protected final void unserialize(InputStream inputStream) - throws IOException + protected final void unserialize(ByteBuffer bytes) { - super.unserialize(inputStream); + super.unserialize(bytes); // one used for initial value m_data_ = new int[m_dataLength_]; - DataInputStream input = new DataInputStream(inputStream); for (int i = 0; i < m_dataLength_; i ++) { - m_data_[i] = input.readInt(); + m_data_[i] = bytes.getInt(); } m_initialValue_ = m_data_[0]; } - + /** * Gets the offset to the data which the surrogate pair points to. * @param lead lead surrogate diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java index a8343618dda..b02d55cc197 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java @@ -7,9 +7,6 @@ package com.ibm.icu.impl; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Arrays; @@ -138,39 +135,6 @@ public abstract class Trie // protected constructor ------------------------------------------- - /** - * Trie constructor for CharTrie use. - * @param inputStream ICU data file input stream which contains the - * trie - * @param dataManipulate object containing the information to parse the - * trie data - * @throws IOException thrown when input stream does not have the - * right header. - */ - protected Trie(InputStream inputStream, - DataManipulate dataManipulate) throws IOException - { - DataInputStream input = new DataInputStream(inputStream); - // Magic number to authenticate the data. - int signature = input.readInt(); - m_options_ = input.readInt(); - - if (!checkHeader(signature)) { - throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); - } - - if(dataManipulate != null) { - m_dataManipulate_ = dataManipulate; - } else { - m_dataManipulate_ = new DefaultGetFoldingOffset(); - } - m_isLatin1Linear_ = (m_options_ & - HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; - m_dataOffset_ = input.readInt(); - m_dataLength_ = input.readInt(); - unserialize(inputStream); - } - /** * Trie constructor for CharTrie use. * @param bytes data of an ICU data file, containing the trie @@ -389,22 +353,6 @@ public abstract class Trie } } - /** - *

Parses the inputstream and creates the trie index with it.

- *

This is overwritten by the child classes. - * @param inputStream input stream containing the trie information - * @exception IOException thrown when data reading fails. - */ - protected void unserialize(InputStream inputStream) throws IOException - { - //indexLength is a multiple of 1024 >> INDEX_STAGE_2_SHIFT_ - m_index_ = new char[m_dataOffset_]; - DataInputStream input = new DataInputStream(inputStream); - for (int i = 0; i < m_dataOffset_; i ++) { - m_index_[i] = input.readChar(); - } - } - /** *

Parses the byte buffer and creates the trie index with it.

*

The position of the input ByteBuffer must be right after the trie header.

diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java index af8137b63b3..495fe3ff581 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java @@ -4,9 +4,9 @@ * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.impl; -import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; @@ -28,30 +28,33 @@ import java.util.NoSuchElementException; */ public abstract class Trie2 implements Iterable { - + /** * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * + * Reads from the current position and leaves the buffer after the end of the trie. + * * The serialized format is identical between ICU4C and ICU4J, so this function * will work with serialized Trie2s from either. - * + * * The actual type of the returned Trie2 will be either Trie2_16 or Trie2_32, depending - * on the width of the data. - * + * on the width of the data. + * * To obtain the width of the Trie2, check the actual class type of the returned Trie2. * Or use the createFromSerialized() function of Trie2_16 or Trie2_32, which will * return only Tries of their specific type/size. - * + * * The serialized Trie2 on the stream may be in either little or big endian byte order. * This allows using serialized Tries from ICU4C without needing to consider the * byte order of the system that created them. * - * @param is an input stream to the serialized form of a UTrie2. + * @param bytes a byte buffer to the serialized form of a UTrie2. * @return An unserialized Trie2, ready for use. * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. - * @throws IOException if a read error occurs on the InputStream. - * + * @throws IOException if a read error occurs in the buffer. + * */ - public static Trie2 createFromSerialized(InputStream is) throws IOException { + public static Trie2 createFromSerialized(ByteBuffer bytes) throws IOException { // From ICU4C utrie2_impl.h // * Trie2 data structure in serialized form: // * @@ -63,193 +66,30 @@ public abstract class Trie2 implements Iterable { // typedef struct UTrie2Header { // /** "Tri2" in big-endian US-ASCII (0x54726932) */ // uint32_t signature; - + // /** // * options bit field: // * 15.. 4 reserved (0) // * 3.. 0 UTrie2ValueBits valueBits // */ // uint16_t options; - // + // // /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ // uint16_t indexLength; - // + // // /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ // uint16_t shiftedDataLength; - // + // // /** Null index and data blocks, not shifted. */ // uint16_t index2NullOffset, dataNullOffset; - // + // // /** // * First code point of the single-value range ending with U+10ffff, // * rounded up and then shifted right by UTRIE2_SHIFT_1. // */ // uint16_t shiftedHighStart; // } UTrie2Header; - - DataInputStream dis = new DataInputStream(is); - boolean needByteSwap = false; - - UTrie2Header header = new UTrie2Header(); - - /* check the signature */ - header.signature = dis.readInt(); - switch (header.signature) { - case 0x54726932: - needByteSwap = false; - break; - case 0x32697254: - needByteSwap = true; - header.signature = Integer.reverseBytes(header.signature); - break; - default: - throw new IllegalArgumentException("Stream does not contain a serialized UTrie2"); - } - - header.options = swapShort(needByteSwap, dis.readUnsignedShort()); - header.indexLength = swapShort(needByteSwap, dis.readUnsignedShort()); - header.shiftedDataLength = swapShort(needByteSwap, dis.readUnsignedShort()); - header.index2NullOffset = swapShort(needByteSwap, dis.readUnsignedShort()); - header.dataNullOffset = swapShort(needByteSwap, dis.readUnsignedShort()); - header.shiftedHighStart = swapShort(needByteSwap, dis.readUnsignedShort()); - - // Trie2 data width - 0: 16 bits - // 1: 32 bits - if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1) { - throw new IllegalArgumentException("UTrie2 serialized format error."); - } - ValueWidth width; - Trie2 This; - if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0) { - width = ValueWidth.BITS_16; - This = new Trie2_16(); - } else { - width = ValueWidth.BITS_32; - This = new Trie2_32(); - } - This.header = header; - - /* get the length values and offsets */ - This.indexLength = header.indexLength; - This.dataLength = header.shiftedDataLength << UTRIE2_INDEX_SHIFT; - This.index2NullOffset = header.index2NullOffset; - This.dataNullOffset = header.dataNullOffset; - This.highStart = header.shiftedHighStart << UTRIE2_SHIFT_1; - This.highValueIndex = This.dataLength - UTRIE2_DATA_GRANULARITY; - if (width == ValueWidth.BITS_16) { - This.highValueIndex += This.indexLength; - } - - // Allocate the Trie2 index array. If the data width is 16 bits, the array also - // includes the space for the data. - - int indexArraySize = This.indexLength; - if (width == ValueWidth.BITS_16) { - indexArraySize += This.dataLength; - } - This.index = new char[indexArraySize]; - - /* Read in the index */ - int i; - for (i=0; i { } } + /** + * Get the UTrie version from an InputStream containing the serialized form + * of either a Trie (version 1) or a Trie2 (version 2). + * + * @param is an InputStream containing the serialized form + * of a UTrie, version 1 or 2. The stream must support mark() and reset(). + * The position of the input stream will be left unchanged. + * @param littleEndianOk If FALSE, only big-endian (Java native) serialized forms are recognized. + * If TRUE, little-endian serialized forms are recognized as well. + * @return the Trie version of the serialized form, or 0 if it is not + * recognized as a serialized UTrie + * @throws IOException on errors in reading from the input stream. + */ + public static int getVersion(InputStream is, boolean littleEndianOk) throws IOException { + if (! is.markSupported()) { + throw new IllegalArgumentException("Input stream must support mark()."); + } + is.mark(4); + byte sig[] = new byte[4]; + int read = is.read(sig); + is.reset(); + + if (read != sig.length) { + return 0; + } + + if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='e') { + return 1; + } + if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='2') { + return 2; + } + if (littleEndianOk) { + if (sig[0]=='e' && sig[1]=='i' && sig[2]=='r' && sig[3]=='T') { + return 1; + } + if (sig[0]=='2' && sig[1]=='i' && sig[2]=='r' && sig[3]=='T') { + return 2; + } + } + return 0; + } + /** * Get the value for a code point as stored in the Trie2. * diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java index 8e68f0966b8..a12e7002c86 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java @@ -9,7 +9,6 @@ package com.ibm.icu.impl; import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -38,25 +37,6 @@ public final class Trie2_16 extends Trie2 { } - /** - * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). - * The serialized format is identical between ICU4C and ICU4J, so this function - * will work with serialized Trie2s from either. - * - * The serialized Trie2 on the stream may be in either little or big endian byte order. - * This allows using serialized Tries from ICU4C without needing to consider the - * byte order of the system that created them. - * - * @param is an input stream to the serialized form of a UTrie2. - * @return An unserialized Trie_16, ready for use. - * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. - * @throws IOException if a read error occurs on the InputStream. - * @throws ClassCastException if the stream contains a serialized Trie2_32 - */ - public static Trie2_16 createFromSerialized(InputStream is) throws IOException { - return (Trie2_16) Trie2.createFromSerialized(is); - } - /** * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). * The serialized format is identical between ICU4C and ICU4J, so this function diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java index 96b581a8ee0..da15626d461 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java @@ -4,11 +4,11 @@ * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.impl; import java.io.DataOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -40,22 +40,18 @@ public class Trie2_32 extends Trie2 { * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). * The serialized format is identical between ICU4C and ICU4J, so this function * will work with serialized Trie2s from either. - * - * The serialized Trie2 on the stream may be in either little or big endian byte order. + * + * The serialized Trie2 in the bytes may be in either little or big endian byte order. * This allows using serialized Tries from ICU4C without needing to consider the * byte order of the system that created them. * - * @param is an input stream to the serialized form of a UTrie2. + * @param bytes a byte buffer to the serialized form of a UTrie2. * @return An unserialized Trie_32, ready for use. * @throws IllegalArgumentException if the stream does not contain a serialized Trie2. - * @throws IOException if a read error occurs on the InputStream. - * @throws ClassCastException if the stream contains a serialized Trie2_16 + * @throws IOException if a read error occurs in the buffer. + * @throws ClassCastException if the bytes contains a serialized Trie2_16 */ - public static Trie2_32 createFromSerialized(InputStream is) throws IOException { - return (Trie2_32) Trie2.createFromSerialized(is); - } - - public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { + public static Trie2_32 createFromSerialized(ByteBuffer bytes) throws IOException { return (Trie2_32) Trie2.createFromSerialized(bytes); } -- 2.40.0