ICU-7057 make ICU4J CollationDataReader work directly with ByteBuffer rather than...

author Markus Scherer <markus.icu@gmail.com>

Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)
author Markus Scherer <markus.icu@gmail.com>
Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java

index 81d108ebc5557f69596e9eedee8d58555d875552..d7e649760077ae9b60c033cdcce23d102faf65fd 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java
@@ -23,7 +23,6 @@ import com.ibm.icu.text.Normalizer2;
  import com.ibm.icu.text.UnicodeSet;
  import com.ibm.icu.text.UnicodeSetIterator;
  import com.ibm.icu.util.ULocale;
-import com.ibm.icu.util.VersionInfo;
  
  public final class CollationBuilder extends CollationRuleParser.Sink {
      private static final boolean DEBUG = false;
@@ -97,8 +96,7 @@ public final class CollationBuilder extends CollationRuleParser.Sink {
          // In Java, we do not have a rules version.
          // In C++, the genrb build tool reads and supplies one,
          // and the rulesVersion is a parameter for this method.
-        VersionInfo rulesVersion = VersionInfo.getInstance(0, 0, 0, 0);
-        tailoring.setVersion(base.version, rulesVersion);
+        tailoring.setVersion(base.version, 0 /* rulesVersion */);
          return tailoring;
      }
  
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java

index 60d10660ae0da028d76bc3353cacdd0dbcc0ced6..527d1923652291fe0a1ffae82321f0f0aa4fb896 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java
@@ -11,10 +11,8 @@
  
  package com.ibm.icu.impl.coll;
  
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
  import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
  import java.util.Arrays;
  
  import com.ibm.icu.impl.ICUBinary;
@@ -95,29 +93,31 @@ final class CollationDataReader /* all static */ {
      static final int IX_RESERVED18_OFFSET = 18;
      static final int IX_TOTAL_SIZE = 19;
  
-    static void read(CollationTailoring base, InputStream inBytes,
+    static void read(CollationTailoring base, ByteBuffer inBytes,
                       CollationTailoring tailoring) throws IOException {
-        BufferedInputStream bis = new BufferedInputStream(inBytes);
-        tailoring.version = ICUBinary.readHeaderAndDataVersion(bis, DATA_FORMAT, IS_ACCEPTABLE);
+        tailoring.version = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE);
          if(base != null && base.getUCAVersion() != tailoring.getUCAVersion()) {
              throw new ICUException("Tailoring UCA version differs from base data UCA version");
          }
  
-        DataInputStream ds = new DataInputStream(bis);
-        int indexesLength = ds.readInt();  // inIndexes[IX_INDEXES_LENGTH]
-        if(indexesLength < 2) {
+        int inLength = inBytes.remaining();
+        if(inLength < 8) {
+            throw new ICUException("not enough bytes");
+        }
+        int indexesLength = inBytes.getInt();  // inIndexes[IX_INDEXES_LENGTH]
+        if(indexesLength < 2 || inLength < indexesLength * 4) {
              throw new ICUException("not enough indexes");
          }
          int[] inIndexes = new int[IX_TOTAL_SIZE + 1];
          inIndexes[0] = indexesLength;
          for(int i = 1; i < indexesLength && i < inIndexes.length; ++i) {
-            inIndexes[i] = ds.readInt();
+            inIndexes[i] = inBytes.getInt();
          }
          for(int i = indexesLength; i < inIndexes.length; ++i) {
              inIndexes[i] = -1;
          }
          if(indexesLength > inIndexes.length) {
-            ds.skipBytes((indexesLength - inIndexes.length) * 4);
+            ICUBinary.skipBytes(inBytes, (indexesLength - inIndexes.length) * 4);
          }
  
          // Assume that the tailoring data is in initial state,
@@ -130,6 +130,17 @@ final class CollationDataReader /* all static */ {
          int offset;  // byte offset for the index part
          int length;  // number of bytes in the index part
  
+        if(indexesLength > IX_TOTAL_SIZE) {
+            length = inIndexes[IX_TOTAL_SIZE];
+        } else if(indexesLength > IX_REORDER_CODES_OFFSET) {
+            length = inIndexes[indexesLength - 1];
+        } else {
+            length = 0;  // only indexes, and inLength was already checked for them
+        }
+        if(inLength < length) {
+            throw new ICUException("not enough bytes");
+        }
+
          CollationData baseData = base == null ? null : base.data;
          int[] reorderCodes;
          index = IX_REORDER_CODES_OFFSET;
@@ -143,13 +154,13 @@ final class CollationDataReader /* all static */ {
              }
              reorderCodes = new int[length / 4];
              for(int i = 0; i < length / 4; ++i) {
-                reorderCodes[i] = ds.readInt();
+                reorderCodes[i] = inBytes.getInt();
              }
              length &= 3;
          } else {
              reorderCodes = new int[0];
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          // There should be a reorder table only if there are reorder codes.
          // However, when there are reorder codes the reorder table may be omitted to reduce
@@ -163,13 +174,13 @@ final class CollationDataReader /* all static */ {
                  throw new ICUException("Reordering table without reordering codes");
              }
              reorderTable = new byte[256];
-            ds.readFully(reorderTable);
+            inBytes.get(reorderTable);
              length -= 256;
          } else {
              // If we have reorder codes, then build the reorderTable at the end,
              // when the CollationData is otherwise complete.
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          if(baseData != null && baseData.numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000L)) {
              throw new ICUException("Tailoring numeric primary weight differs from base data");
@@ -184,7 +195,7 @@ final class CollationDataReader /* all static */ {
              data = tailoring.ownedData;
              data.base = baseData;
              data.numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000L;
-            data.trie = tailoring.trie = Trie2_32.createFromSerialized(ds);
+            data.trie = tailoring.trie = Trie2_32.createFromSerialized(inBytes);
              int trieLength = data.trie.getSerializedLength();
              if(trieLength > length) {
                  throw new ICUException("Not enough bytes for the mappings trie");  // No mappings.
@@ -196,12 +207,12 @@ final class CollationDataReader /* all static */ {
          } else {
              throw new ICUException("Missing collation data mappings");  // No mappings.
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_RESERVED8_OFFSET;
          offset = inIndexes[index];
          length = inIndexes[index + 1] - offset;
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_CES_OFFSET;
          offset = inIndexes[index];
@@ -212,16 +223,16 @@ final class CollationDataReader /* all static */ {
              }
              data.ces = new long[length / 8];
              for(int i = 0; i < length / 8; ++i) {
-                data.ces[i] = ds.readLong();
+                data.ces[i] = inBytes.getLong();
              }
              length &= 7;
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_RESERVED10_OFFSET;
          offset = inIndexes[index];
          length = inIndexes[index + 1] - offset;
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_CE32S_OFFSET;
          offset = inIndexes[index];
@@ -232,11 +243,11 @@ final class CollationDataReader /* all static */ {
              }
              data.ce32s = new int[length / 4];
              for(int i = 0; i < length / 4; ++i) {
-                data.ce32s[i] = ds.readInt();
+                data.ce32s[i] = inBytes.getInt();
              }
              length &= 3;
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          int jamoCE32sStart = inIndexes[IX_JAMO_CE32S_START];
          if(jamoCE32sStart >= 0) {
@@ -266,7 +277,7 @@ final class CollationDataReader /* all static */ {
              }
              data.rootElements = new long[rootElementsLength];
              for(int i = 0; i < rootElementsLength; ++i) {
-                data.rootElements[i] = ds.readInt() & 0xffffffffL;  // unsigned int -> long
+                data.rootElements[i] = inBytes.getInt() & 0xffffffffL;  // unsigned int -> long
              }
              long commonSecTer = data.rootElements[CollationRootElements.IX_COMMON_SEC_AND_TER_CE];
              if(commonSecTer != Collation.COMMON_SEC_AND_TER_CE) {
@@ -280,7 +291,7 @@ final class CollationDataReader /* all static */ {
              }
              length &= 3;
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_CONTEXTS_OFFSET;
          offset = inIndexes[index];
@@ -291,12 +302,12 @@ final class CollationDataReader /* all static */ {
              }
              StringBuilder sb = new StringBuilder(length / 2);
              for(int i = 0; i < length / 2; ++i) {
-                sb.append(ds.readChar());
+                sb.append(inBytes.getChar());
              }
              data.contexts = sb.toString();
              length &= 1;
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_UNSAFE_BWD_OFFSET;
          offset = inIndexes[index];
@@ -327,7 +338,7 @@ final class CollationDataReader /* all static */ {
              USerializedSet sset = new USerializedSet();
              char[] unsafeData = new char[length / 2];
              for(int i = 0; i < length / 2; ++i) {
-                unsafeData[i] = ds.readChar();
+                unsafeData[i] = inBytes.getChar();
              }
              length &= 1;
              sset.getSet(unsafeData, 0);
@@ -355,7 +366,7 @@ final class CollationDataReader /* all static */ {
          } else {
              throw new ICUException("Missing unsafe-backward-set");
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          // If the fast Latin format version is different,
          // or the version is set to 0 for "no fast Latin table",
@@ -368,17 +379,17 @@ final class CollationDataReader /* all static */ {
              data.fastLatinTableHeader = null;
              if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin.VERSION) {
                  if(length >= 2) {
-                    char header0 = ds.readChar();
+                    char header0 = inBytes.getChar();
                      int headerLength = header0 & 0xff;
                      data.fastLatinTableHeader = new char[headerLength];
                      data.fastLatinTableHeader[0] = header0;
                      for(int i = 1; i < headerLength; ++i) {
-                        data.fastLatinTableHeader[i] = ds.readChar();
+                        data.fastLatinTableHeader[i] = inBytes.getChar();
                      }
                      int tableLength = length / 2 - headerLength;
                      data.fastLatinTable = new char[tableLength];
                      for(int i = 0; i < tableLength; ++i) {
-                        data.fastLatinTable[i] = ds.readChar();
+                        data.fastLatinTable[i] = inBytes.getChar();
                      }
                      length &= 1;
                      if((header0 >> 8) != CollationFastLatin.VERSION) {
@@ -390,7 +401,7 @@ final class CollationDataReader /* all static */ {
                  }
              }
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_SCRIPTS_OFFSET;
          offset = inIndexes[index];
@@ -401,7 +412,7 @@ final class CollationDataReader /* all static */ {
              }
              data.scripts = new char[length / 2];
              for(int i = 0; i < length / 2; ++i) {
-                data.scripts[i] = ds.readChar();
+                data.scripts[i] = inBytes.getChar();
              }
              length &= 1;
          } else if(data == null) {
@@ -409,7 +420,7 @@ final class CollationDataReader /* all static */ {
          } else if(baseData != null) {
              data.scripts = baseData.scripts;
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_COMPRESSIBLE_BYTES_OFFSET;
          offset = inIndexes[index];
@@ -420,7 +431,7 @@ final class CollationDataReader /* all static */ {
              }
              data.compressibleBytes = new boolean[256];
              for(int i = 0; i < 256; ++i) {
-                data.compressibleBytes[i] = ds.readBoolean();
+                data.compressibleBytes[i] = inBytes.get() != 0;
              }
              length -= 256;
          } else if(data == null) {
@@ -430,14 +441,12 @@ final class CollationDataReader /* all static */ {
          } else {
              throw new ICUException("Missing data for compressible primary lead bytes");
          }
-        ds.skipBytes(length);
+        ICUBinary.skipBytes(inBytes, length);
  
          index = IX_RESERVED18_OFFSET;
          offset = inIndexes[index];
          length = inIndexes[index + 1] - offset;
-        ds.skipBytes(length);
-
-        ds.close();
+        ICUBinary.skipBytes(inBytes, length);
  
          CollationSettings ts = tailoring.settings.readOnly();
          int options = inIndexes[IX_OPTIONS] & 0xffff;
@@ -481,7 +490,7 @@ final class CollationDataReader /* all static */ {
          }
      }
      private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
-    private static final byte DATA_FORMAT[] = { 0x55, 0x43, 0x6f, 0x6c  };  // "UCol"
+    private static final int DATA_FORMAT = 0x55436f6c;  // "UCol"
  
      private CollationDataReader() {}  // no constructor
  }
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java

index ded3e9bccf3f47f7d02d83904e216b7f0731c76f..8ec297fa6fafa0ac559bdb843da36bc5668820e8 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java
@@ -13,8 +13,8 @@
  
  package com.ibm.icu.impl.coll;
  
-import java.io.ByteArrayInputStream;
  import java.io.IOException;
+import java.nio.ByteBuffer;
  import java.util.MissingResourceException;
  
  import com.ibm.icu.impl.ICUResourceBundle;
@@ -203,14 +203,13 @@ public final class CollationLoader {
  
          // deserialize
          UResourceBundle binary = ((ICUResourceBundle)data).get("%%CollationBin");
-        byte[] inBytes = binary.getBinary(null);
-        ByteArrayInputStream inStream = new ByteArrayInputStream(inBytes);
+        ByteBuffer inBytes = binary.getBinary();
          try {
-            CollationDataReader.read(root, inStream, t);
+            CollationDataReader.read(root, inBytes, t);
          } catch (IOException e) {
              throw new ICUUncheckedIOException("Failed to load collation tailoring data for locale:"
                      + actualLocale + " type:" + type, e);
-        }   // No need to close BAIS.
+        }
  
          // Try to fetch the optional rules string.
          try {
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java

index b75949bedc8f668bb285b3f65429dab83d1f5987..13c0071f736939652735846b2b9fee2d1f37b793 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java
@@ -15,6 +15,7 @@ import java.io.IOException;
  import java.io.InputStream;
  import java.util.MissingResourceException;
  
+import com.ibm.icu.impl.ICUBinary;
  import com.ibm.icu.impl.ICUData;
  import com.ibm.icu.impl.ICUResourceBundle;
  
@@ -42,11 +43,12 @@ public final class CollationRoot {  // purely static
  
      static {  // Corresponds to C++ load() function.
          CollationTailoring t = new CollationTailoring(null);
+        // TODO: Optionally load from a .dat file or stand-alone .icu file.
          String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
-        InputStream inBytes = ICUData.getRequiredStream(path);
+        InputStream is = ICUData.getRequiredStream(path);
          RuntimeException e2 = null;
          try {
-            CollationDataReader.read(null, inBytes, t);
+            CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t);
          } catch(IOException e) {
              t = null;
              e2 = new MissingResourceException(
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java

index b1db1019243f5578d7b5800c7f5a7ec6c96a2cca..b49aafbfd89108568820ed3cf2af6d91e065515b 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java
@@ -54,16 +54,21 @@ public final class CollationTailoring {
                  ucaVersion.getMilli() << 6,
                  0);
      }
-    void setVersion(VersionInfo baseVersion, VersionInfo rulesVersion) {
-        version = VersionInfo.getInstance(
-                VersionInfo.UCOL_BUILDER_VERSION.getMajor(),
-                baseVersion.getMinor(),
-                (baseVersion.getMilli() & 0xc0) + ((rulesVersion.getMajor() + (rulesVersion.getMajor() >> 6)) & 0x3f),
-                (rulesVersion.getMinor() << 3) + (rulesVersion.getMinor() >> 5) + rulesVersion.getMilli() +
-                        (rulesVersion.getMicro() << 4) + (rulesVersion.getMicro() >> 4));
+    void setVersion(int baseVersion, int rulesVersion) {
+        // See comments for version field.
+        int r = (rulesVersion >> 16) & 0xff00;
+        int s = (rulesVersion >> 16) & 0xff;
+        int t = (rulesVersion >> 8) & 0xff;
+        int q = rulesVersion & 0xff;
+        version = (VersionInfo.UCOL_BUILDER_VERSION.getMajor() << 24) |
+                (baseVersion & 0xffc000) |  // UCA version u.v.w
+                ((r + (r >> 6)) & 0x3f00) |
+                (((s << 3) + (s >> 5) + t + (q << 4) + (q >> 4)) & 0xff);
      }
      int getUCAVersion() {
-        return (version.getMinor() << 4) | (version.getMilli() >> 6);
+        // Version second byte/bits 23..16 to bits 11..4,
+        // third byte/bits 15..14 to bits 1..0.
+        return ((version >> 12) & 0xff0) | ((version >> 14) & 3);
      }
  
      // data for sorting etc.
@@ -78,8 +83,7 @@ public final class CollationTailoring {
      // version[1]: bits 7..3=u, bits 2..0=v
      // version[2]: bits 7..6=w, bits 5..0=r
      // version[3]= (s<<5)+(s>>3)+t+(q<<4)+(q>>4)
-    public VersionInfo version = ZERO_VERSION;
-    private static final VersionInfo ZERO_VERSION = VersionInfo.getInstance(0, 0, 0, 0);
+    public int version = 0;
  
      // owned objects
      CollationData ownedData;
diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java

index 1ee5ba2b447fcac008ac87a8619553a9b84c25b2..519840e631fa05c350ba6e26b685adbf0ee9292c 100644 (file)
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -1817,11 +1817,11 @@ public final class RuleBasedCollator extends Collator {
       */
      @Override
      public VersionInfo getVersion() {
-        VersionInfo version = tailoring.version;
+        int version = tailoring.version;
          int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
          return VersionInfo.getInstance(
-                version.getMajor() + (rtVersion << 4) + (rtVersion >> 4),
-                version.getMinor(), version.getMilli(), version.getMicro());
+                (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
+                ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
      }
  
      /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java

index 634f418d25423f6749b33a0798e45c53cff29fd0..730b89d35a8e2525d3d5afe02c585716a30d5bc9 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java
@@ -1,7 +1,7 @@
  /*
   *******************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
   *******************************************************************************
   */
  package com.ibm.icu.impl;
@@ -9,6 +9,8 @@ package com.ibm.icu.impl;
  import java.io.DataInputStream;
  import java.io.IOException;
  import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
  import java.util.Arrays;
  
  import com.ibm.icu.util.VersionInfo;
@@ -146,6 +148,90 @@ public final class ICUBinary
                                         dataVersion[2], dataVersion[3]);
      }
  
+    /**
+     * Reads an ICU data header, checks the data format, and returns the data version.
+     *
+     * <p>Assumes that the ByteBuffer position is 0 on input.
+     * The buffer byte order is set according to the data.
+     * The buffer position is advanced past the header (including UDataInfo and comment).
+     *
+     * <p>See C++ ucmndata.h and unicode/udata.h.
+     *
+     * @return dataVersion
+     * @throws IOException if this is not a valid ICU data item of the expected dataFormat
+     */
+    public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
+            throws IOException {
+        assert bytes.position() == 0;
+        byte magic1 = bytes.get(2);
+        byte magic2 = bytes.get(3);
+        if (magic1 != MAGIC1 || magic2 != MAGIC2) {
+            throw new IOException(MAGIC_NUMBER_AUTHENTICATION_FAILED_);
+        }
+
+        byte isBigEndian = bytes.get(8);
+        byte charsetFamily = bytes.get(9);
+        byte sizeofUChar = bytes.get(10);
+        if (isBigEndian < 0 || 1 < isBigEndian ||
+                charsetFamily != CHAR_SET_ || sizeofUChar != CHAR_SIZE_) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+        }
+        bytes.order(isBigEndian != 0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN);
+
+        int headerSize = bytes.getChar(0);
+        int sizeofUDataInfo = bytes.getChar(4);
+        if (sizeofUDataInfo < 20 || headerSize < (sizeofUDataInfo + 4)) {
+            throw new IOException("Internal Error: Header size error");
+        }
+        // TODO: Change Authenticate to take int major, int minor, int milli, int micro
+        // to avoid array allocation.
+        byte[] formatVersion = new byte[] {
+            bytes.get(16), bytes.get(17), bytes.get(18), bytes.get(19)
+        };
+        if (bytes.get(12) != (byte)(dataFormat >> 24) ||
+                bytes.get(13) != (byte)(dataFormat >> 16) ||
+                bytes.get(14) != (byte)(dataFormat >> 8) ||
+                bytes.get(15) != (byte)dataFormat ||
+                (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+        }
+
+        bytes.position(headerSize);
+        return  // dataVersion
+                ((int)bytes.get(20) << 24) |
+                ((bytes.get(21) & 0xff) << 16) |
+                ((bytes.get(22) & 0xff) << 8) |
+                (bytes.get(23) & 0xff);
+    }
+
+    public static final void skipBytes(ByteBuffer bytes, int skipLength) {
+        if (skipLength > 0) {
+            bytes.position(bytes.position() + skipLength);
+        }
+    }
+
+    /**
+     * Reads the entire contents from the stream into a byte array
+     * and wraps it into a ByteBuffer. Closes the InputStream at the end.
+     */
+    public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
+        try {
+            int avail = is.available();
+            byte[] bytes = new byte[avail];
+            assert avail == is.read(bytes);
+            while((avail = is.available()) != 0) {
+                // TODO Java 6 replace new byte[] and arraycopy(): byte[] newBytes = Arrays.copyOf(bytes, bytes.length + avail);
+                byte[] newBytes = new byte[bytes.length + avail];
+                System.arraycopy(bytes, 0, newBytes, 0, bytes.length);
+                assert avail == is.read(newBytes, bytes.length, avail);
+                bytes = newBytes;
+            }
+            return ByteBuffer.wrap(bytes);
+        } finally {
+            is.close();
+        }
+    }
+
      // private variables -------------------------------------------------
    
      /**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java

index 48bb9cc49a31ba1ef09c702533651e348377825d..af8137b63b344014d037a3c390b809ae8a3c8092 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2009-2011, International Business Machines Corporation and
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -10,6 +10,8 @@ import java.io.DataInputStream;
  import java.io.DataOutputStream;
  import java.io.IOException;
  import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
  import java.util.Iterator;
  import java.util.NoSuchElementException;
  
@@ -200,7 +202,7 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
      }
      
      
-     /**
+    /**
       * Get the UTrie version from an InputStream containing the serialized form
       * of either a Trie (version 1) or a Trie2 (version 2).
       *
@@ -242,8 +244,115 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
          }
          return 0;
      }
-    
-    
+
+    /**
+     * Deserializes a Trie2 from a ByteBuffer.
+     * Reads from the current position and leaves the buffer after the end of the trie.
+     */
+    public static Trie2 createFromSerialized(ByteBuffer bytes) throws IOException {
+        ByteOrder outerByteOrder = bytes.order();
+        try {
+            UTrie2Header header = new UTrie2Header();
+
+            /* check the signature */
+            header.signature = bytes.getInt();
+            switch (header.signature) {
+            case 0x54726932:
+                bytes.order(ByteOrder.BIG_ENDIAN);
+                break;
+            case 0x32697254:
+                bytes.order(ByteOrder.LITTLE_ENDIAN);
+                header.signature = 0x54726932;
+                break;
+            default:
+                throw new IllegalArgumentException("Buffer does not contain a serialized UTrie2");
+            }
+
+            header.options = bytes.getChar();
+            header.indexLength = bytes.getChar();
+            header.shiftedDataLength = bytes.getChar();
+            header.index2NullOffset = bytes.getChar();
+            header.dataNullOffset   = bytes.getChar();
+            header.shiftedHighStart = bytes.getChar();
+
+            // Trie2 data width - 0: 16 bits
+            //                    1: 32 bits
+            if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) > 1) {
+                throw new IllegalArgumentException("UTrie2 serialized format error.");
+            }
+            ValueWidth width;
+            Trie2 This;
+            if ((header.options & UTRIE2_OPTIONS_VALUE_BITS_MASK) == 0) {
+                width = ValueWidth.BITS_16;
+                This = new Trie2_16();
+            } else {
+                width = ValueWidth.BITS_32;
+                This = new Trie2_32();
+            }
+            This.header = header;
+
+            /* get the length values and offsets */
+            This.indexLength      = header.indexLength;
+            This.dataLength       = header.shiftedDataLength << UTRIE2_INDEX_SHIFT;
+            This.index2NullOffset = header.index2NullOffset;
+            This.dataNullOffset   = header.dataNullOffset;
+            This.highStart        = header.shiftedHighStart << UTRIE2_SHIFT_1;
+            This.highValueIndex   = This.dataLength - UTRIE2_DATA_GRANULARITY;
+            if (width == ValueWidth.BITS_16) {
+                This.highValueIndex += This.indexLength;
+            }
+
+            // Allocate the Trie2 index array. If the data width is 16 bits, the array also
+            // includes the space for the data.
+
+            int indexArraySize = This.indexLength;
+            if (width == ValueWidth.BITS_16) {
+                indexArraySize += This.dataLength;
+            }
+            This.index = new char[indexArraySize];
+
+            /* Read in the index */
+            int i;
+            for (i=0; i<This.indexLength; i++) {
+                This.index[i] = bytes.getChar();
+            }
+
+            /* Read in the data. 16 bit data goes in the same array as the index.
+             * 32 bit data goes in its own separate data array.
+             */
+            if (width == ValueWidth.BITS_16) {
+                This.data16 = This.indexLength;
+                for (i=0; i<This.dataLength; i++) {
+                    This.index[This.data16 + i] = bytes.getChar();
+                }
+            } else {
+                This.data32 = new int[This.dataLength];
+                for (i=0; i<This.dataLength; i++) {
+                    This.data32[i] = bytes.getInt();
+                }
+            }
+
+            switch(width) {
+            case BITS_16:
+                This.data32 = null;
+                This.initialValue = This.index[This.dataNullOffset];
+                This.errorValue   = This.index[This.data16+UTRIE2_BAD_UTF8_DATA_OFFSET];
+                break;
+            case BITS_32:
+                This.data16=0;
+                This.initialValue = This.data32[This.dataNullOffset];
+                This.errorValue   = This.data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
+                break;
+            default:
+                throw new IllegalArgumentException("UTrie2 serialized format error.");
+            }
+
+            return This;
+        } finally {
+            bytes.order(outerByteOrder);
+        }
+    }
+
      /**
       * Get the value for a code point as stored in the Trie2.
       *
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java

index 0a9b95a17aad6c36663157a66ea23bbbddb48596..96b581a8ee099bc60ed03989a6b78a912bc979e9 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2009-2010, International Business Machines Corporation and
+ * Copyright (C) 2009-2014, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -10,6 +10,7 @@ import java.io.DataOutputStream;
  import java.io.IOException;
  import java.io.InputStream;
  import java.io.OutputStream;
+import java.nio.ByteBuffer;
  
  /**
   * @author aheninger
@@ -54,6 +55,10 @@ public class Trie2_32 extends Trie2 {
          return (Trie2_32) Trie2.createFromSerialized(is);
      }
  
+    public static Trie2_32  createFromSerialized(ByteBuffer bytes) throws IOException {
+        return (Trie2_32) Trie2.createFromSerialized(bytes);
+    }
+
      /**
       * Get the value for a code point as stored in the Trie2.
       *
author	Markus Scherer <markus.icu@gmail.com>
	Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Sun, 1 Jun 2014 00:15:06 +0000 (00:15 +0000)
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationBuilder.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationLoader.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java		patch \| blob \| history
icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2_32.java		patch \| blob \| history