From 08b5fb11b0a984aefa6b174add6f365ec4345f6c Mon Sep 17 00:00:00 2001 From: Fredrik Roubert Date: Tue, 15 Jul 2014 20:19:55 +0000 Subject: [PATCH] ICU-10944 Extend ByteBuffer support in ICUBinary and Trie classes. This new functionality is needed for ByteBuffer support in Normalizer2Impl and BreakIterator. R=markus.icu@gmail.com Review URL: https://codereview.appspot.com/107620044 X-SVN-Rev: 36033 --- .../core/src/com/ibm/icu/impl/CharTrie.java | 46 ++++++++++++++-- .../core/src/com/ibm/icu/impl/ICUBinary.java | 11 ++++ .../core/src/com/ibm/icu/impl/Trie.java | 55 +++++++++++++++++-- .../core/src/com/ibm/icu/impl/Trie2_16.java | 23 +++++++- .../com/ibm/icu/dev/test/util/Trie2Test.java | 25 ++++----- 5 files changed, 133 insertions(+), 27 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java index 032073a857a..c08d0231884 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java @@ -1,15 +1,16 @@ /* -****************************************************************************** -* Copyright (C) 1996-2011, International Business Machines Corporation and * -* others. All Rights Reserved. * -****************************************************************************** -*/ + ****************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ****************************************************************************** + */ package com.ibm.icu.impl; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import com.ibm.icu.text.UTF16; @@ -47,6 +48,23 @@ public class CharTrie extends Trie } } + /** + *

Creates a new Trie with the settings for the trie data.

+ *

Unserialize the 32-bit-aligned input buffer and use the data for the + * trie.

+ * @param bytes data of an ICU data file, containing the trie + * @param dataManipulate object which provides methods to parse the char + * data + */ + public CharTrie(ByteBuffer bytes, DataManipulate dataManipulate) { + super(bytes, dataManipulate); + + if (!isCharTrie()) { + throw new IllegalArgumentException( + "Data given does not belong to a char trie."); + } + } + /** * Make a dummy CharTrie. * A dummy trie is an empty runtime trie, used when a real data trie cannot @@ -253,7 +271,23 @@ public class CharTrie extends Trie m_data_ = m_index_; m_initialValue_ = m_data_[m_dataOffset_]; } - + + /** + *

Parses the byte buffer and stores its trie content into a index and + * data array

+ * @param bytes buffer containing trie data + */ + protected final void unserialize(ByteBuffer bytes) + { + int indexDataLength = m_dataOffset_ + m_dataLength_; + m_index_ = new char[indexDataLength]; + for (int i = 0; i < indexDataLength; i ++) { + m_index_[i] = bytes.getChar(); + } + m_data_ = m_index_; + m_initialValue_ = m_data_[m_dataOffset_]; + } + /** * Gets the offset to the data which the surrogate pair points to. * @param lead lead surrogate diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 155f7712eb5..d0a0bb98122 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -4,6 +4,7 @@ * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.impl; import java.io.DataInputStream; @@ -148,6 +149,16 @@ public final class ICUBinary dataVersion[2], dataVersion[3]); } + /** + * Same as readHeader(), but returns a VersionInfo rather than a compact int. + */ + public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes, + int dataFormat, + Authenticate authenticate) + throws IOException { + return getVersionInfoFromCompactInt(readHeader(bytes, dataFormat, authenticate)); + } + /** * Reads an ICU data header, checks the data format, and returns the data version. * diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java index dc49ac4840e..a8343618dda 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie.java @@ -1,15 +1,16 @@ /* -****************************************************************************** -* Copyright (C) 1996-2011, International Business Machines Corporation and * -* others. All Rights Reserved. * -****************************************************************************** -*/ + ****************************************************************************** + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ****************************************************************************** + */ package com.ibm.icu.impl; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Arrays; import com.ibm.icu.lang.UCharacter; @@ -169,7 +170,35 @@ public abstract class Trie m_dataLength_ = input.readInt(); unserialize(inputStream); } - + + /** + * Trie constructor for CharTrie use. + * @param bytes data of an ICU data file, containing the trie + * @param dataManipulate object containing the information to parse the + * trie data + */ + protected Trie(ByteBuffer bytes, DataManipulate dataManipulate) + { + // Magic number to authenticate the data. + int signature = bytes.getInt(); + m_options_ = bytes.getInt(); + + if (!checkHeader(signature)) { + throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file"); + } + + if(dataManipulate != null) { + m_dataManipulate_ = dataManipulate; + } else { + m_dataManipulate_ = new DefaultGetFoldingOffset(); + } + m_isLatin1Linear_ = (m_options_ & + HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0; + m_dataOffset_ = bytes.getInt(); + m_dataLength_ = bytes.getInt(); + unserialize(bytes); + } + /** * Trie constructor * @param index array to be used for index @@ -376,6 +405,20 @@ public abstract class Trie } } + /** + *

Parses the byte buffer and creates the trie index with it.

+ *

The position of the input ByteBuffer must be right after the trie header.

+ *

This is overwritten by the child classes. + * @param bytes buffer containing trie data + */ + protected void unserialize(ByteBuffer bytes) + { + m_index_ = new char[m_dataOffset_]; + for (int i = 0; i < m_dataOffset_; i ++) { + m_index_[i] = bytes.getChar(); + } + } + /** * Determines if this is a 32 bit trie * @return true if options specifies this is a 32 bit trie diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java index cf4666b6307..8e68f0966b8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_16.java @@ -1,15 +1,17 @@ /* ******************************************************************************* - * Copyright (C) 2009-2010, International Business Machines Corporation and + * Copyright (C) 2009-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.impl; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; /** @@ -55,6 +57,25 @@ public final class Trie2_16 extends Trie2 { return (Trie2_16) Trie2.createFromSerialized(is); } + /** + * Create a Trie2 from its serialized form. Inverse of utrie2_serialize(). + * The serialized format is identical between ICU4C and ICU4J, so this function + * will work with serialized Trie2s from either. + * + * The serialized Trie2 in the bytes may be in either little or big endian byte order. + * This allows using serialized Tries from ICU4C without needing to consider the + * byte order of the system that created them. + * + * @param bytes a byte buffer to the serialized form of a UTrie2. + * @return An unserialized Trie2_16, ready for use. + * @throws IllegalArgumentException if the buffer does not contain a serialized Trie2. + * @throws IOException if a read error occurs in the buffer. + * @throws ClassCastException if the bytes contain a serialized Trie2_32 + */ + public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException { + return (Trie2_16) Trie2.createFromSerialized(bytes); + } + /** * Get the value for a code point as stored in the Trie2. * diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java index 3bb973d892b..e2d8d40ed34 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/Trie2Test.java @@ -1,18 +1,21 @@ /* ******************************************************************************* - * Copyright (C) 2009, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2009-2014, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ + package com.ibm.icu.dev.test.util; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Iterator; import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.impl.ICUBinary; import com.ibm.icu.impl.Trie2; import com.ibm.icu.impl.Trie2Writable; import com.ibm.icu.impl.Trie2_16; @@ -212,15 +215,13 @@ public class Trie2Test extends TestFmwk { ByteArrayOutputStream os = new ByteArrayOutputStream(); try { frozen16.serialize(os); - ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); - Trie2 unserialized16 = Trie2.createFromSerialized(is); + Trie2 unserialized16 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray())); assertEquals("", trie, unserialized16); assertEquals("", Trie2_16.class, unserialized16.getClass()); os.reset(); frozen32.serialize(os); - is = new ByteArrayInputStream(os.toByteArray()); - Trie2 unserialized32 = Trie2.createFromSerialized(is); + Trie2 unserialized32 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray())); assertEquals("", trie, unserialized32); assertEquals("", Trie2_32.class, unserialized32.getClass()); } catch (IOException e) { @@ -337,8 +338,7 @@ public class Trie2Test extends TestFmwk { // Fragile test. Serialized length could change with changes to compaction. // But it should not change unexpectedly. assertEquals("", 3508, serializedLen); - ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray()); - Trie2 t1ws16 = Trie2.createFromSerialized(is); + Trie2 t1ws16 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray())); assertEquals("", t1ws16.getClass(), Trie2_16.class); assertEquals("", t1w, t1ws16); @@ -348,8 +348,7 @@ public class Trie2Test extends TestFmwk { // Fragile test. Serialized length could change with changes to compaction. // But it should not change unexpectedly. assertEquals("", 4332, serializedLen); - is = new ByteArrayInputStream(os.toByteArray()); - Trie2 t1ws32 = Trie2.createFromSerialized(is); + Trie2 t1ws32 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray())); assertEquals("", t1ws32.getClass(), Trie2_32.class); assertEquals("", t1w, t1ws32); } catch (IOException e) { @@ -716,13 +715,11 @@ public class Trie2Test extends TestFmwk { String fileName32 = "Trie2Test." + serializedName + ".32.tri2"; InputStream is = Trie2Test.class.getResourceAsStream(fileName16); - Trie2 trie16 = Trie2.createFromSerialized(is); - is.close(); + Trie2 trie16 = Trie2.createFromSerialized(ICUBinary.getByteBufferFromInputStream(is)); trieGettersTest(testName, trie16, checkRanges); is = Trie2Test.class.getResourceAsStream(fileName32); - Trie2 trie32 = Trie2.createFromSerialized(is); - is.close(); + Trie2 trie32 = Trie2.createFromSerialized(ICUBinary.getByteBufferFromInputStream(is)); trieGettersTest(testName, trie32, checkRanges); -- 2.40.0