From 056237d6db6cb302f252a5199a43767a57bdbd16 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 24 Aug 2015 20:30:44 +0000 Subject: [PATCH] ICU-11004 bulk-read data from ByteBuffer; read .res key strings up front into byte[] for faster lookup X-SVN-Rev: 37806 --- .../src/com/ibm/icu/charset/CharsetHZ.java | 8 +- .../src/com/ibm/icu/charset/CharsetICU.java | 6 +- .../src/com/ibm/icu/charset/CharsetMBCS.java | 4 +- .../com/ibm/icu/charset/UConverterAlias.java | 50 ++++---- .../charset/UConverterAliasDataReader.java | 41 +----- .../ibm/icu/charset/UConverterDataReader.java | 10 +- .../icu/impl/coll/CollationDataReader.java | 50 +++----- .../core/src/com/ibm/icu/impl/CharTrie.java | 7 +- .../core/src/com/ibm/icu/impl/ICUBinary.java | 54 ++++++++ .../ibm/icu/impl/ICUResourceBundleReader.java | 118 +++++++++++------- .../core/src/com/ibm/icu/impl/IntTrie.java | 7 +- .../ibm/icu/impl/InvalidFormatException.java | 12 +- .../src/com/ibm/icu/impl/Normalizer2Impl.java | 13 +- .../ibm/icu/impl/StringPrepDataReader.java | 8 +- .../core/src/com/ibm/icu/impl/Trie.java | 7 +- .../core/src/com/ibm/icu/impl/Trie2.java | 16 +-- .../core/src/com/ibm/icu/impl/UBiDiProps.java | 15 +-- .../core/src/com/ibm/icu/impl/UCaseProps.java | 17 +-- .../ibm/icu/impl/UCharacterNameReader.java | 21 +--- .../com/ibm/icu/impl/UCharacterProperty.java | 12 +- .../com/ibm/icu/impl/UPropertyAliases.java | 7 +- .../src/com/ibm/icu/text/BreakDictionary.java | 26 ++-- .../src/com/ibm/icu/text/DictionaryData.java | 16 +-- .../src/com/ibm/icu/text/RBBIDataWrapper.java | 52 +++----- .../src/com/ibm/icu/text/SpoofChecker.java | 16 +-- .../core/src/com/ibm/icu/text/StringPrep.java | 3 +- 26 files changed, 259 insertions(+), 337 deletions(-) diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java index 4fc11086ac0..f39a54d2ad1 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetHZ.java @@ -1,7 +1,7 @@ /* ******************************************************************************* - * Copyright (C) 2008-2009, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 2008-2015, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************************* */ package com.ibm.icu.charset; @@ -12,6 +12,7 @@ import java.nio.IntBuffer; import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; +import java.nio.charset.UnsupportedCharsetException; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; @@ -32,6 +33,9 @@ class CharsetHZ extends CharsetICU { public CharsetHZ(String icuCanonicalName, String canonicalName, String[] aliases) { super(icuCanonicalName, canonicalName, aliases); gbCharset = (CharsetMBCS) new CharsetProviderICU().charsetForName("GBK"); + if (gbCharset == null) { + throw new UnsupportedCharsetException("unable to open ICU GBK Charset, required for HZ"); + } maxBytesPerChar = 4; minBytesPerChar = 1; diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java index 7779de9b564..0942663c749 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetICU.java @@ -179,7 +179,11 @@ public abstract class CharsetICU extends Charset{ return conv; } }catch (InvocationTargetException e) { - throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className+ ". Exception:" + e.getTargetException()); + Throwable cause = e.getCause(); + UnsupportedCharsetException e2 = new UnsupportedCharsetException( + icuCanonicalName + ": " + "Could not load " + className + ". Exception: " + cause); + e2.initCause(cause); + throw e2; } }catch(ClassNotFoundException ex){ }catch(NoSuchMethodException ex){ diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java index 36ca2f01740..7ca3fc534e3 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java @@ -224,9 +224,9 @@ class CharsetMBCS extends CharsetICU { reader = new UConverterDataReader(b); reader.readStaticData(staticData); } catch (IOException e) { - throw new InvalidFormatException(); + throw new InvalidFormatException(e); } catch (Exception e) { - throw new InvalidFormatException(); + throw new InvalidFormatException(e); } UConverterSharedData data = null; diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java index 2d53b887e79..190cee7334e 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2006-2014, International Business Machines Corporation and + * Copyright (C) 2006-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * @@ -29,19 +29,19 @@ final class UConverterAlias { static final int NUM_HIDDEN_TAGS = 1; - static int[] gConverterList = null; + static char[] gConverterList = null; - static int[] gTagList = null; + static char[] gTagList = null; - static int[] gAliasList = null; + static char[] gAliasList = null; - static int[] gUntaggedConvArray = null; + static char[] gUntaggedConvArray = null; - static int[] gTaggedAliasArray = null; + static char[] gTaggedAliasArray = null; - static int[] gTaggedAliasLists = null; + static char[] gTaggedAliasLists = null; - static int[] gOptionTable = null; + static char[] gOptionTable = null; static byte[] gStringTable = null; @@ -134,20 +134,18 @@ final class UConverterAlias { if (tableStart < minTocLength) { throw new IOException("Invalid data format."); } - gConverterList = new int[tableArray[converterListIndex]]; - gTagList= new int[tableArray[tagListIndex]]; - gAliasList = new int[tableArray[aliasListIndex]]; - gUntaggedConvArray = new int[tableArray[untaggedConvArrayIndex]]; - gTaggedAliasArray = new int[tableArray[taggedAliasArrayIndex]]; - gTaggedAliasLists = new int[tableArray[taggedAliasListsIndex]]; - gOptionTable = new int[tableArray[optionTableIndex]]; + gConverterList = ICUBinary.getChars(b, tableArray[converterListIndex], 0); + gTagList = ICUBinary.getChars(b, tableArray[tagListIndex], 0); + gAliasList = ICUBinary.getChars(b, tableArray[aliasListIndex], 0); + gUntaggedConvArray = ICUBinary.getChars(b, tableArray[untaggedConvArrayIndex], 0); + gTaggedAliasArray = ICUBinary.getChars(b, tableArray[taggedAliasArrayIndex], 0); + gTaggedAliasLists = ICUBinary.getChars(b, tableArray[taggedAliasListsIndex], 0); + gOptionTable = ICUBinary.getChars(b, tableArray[optionTableIndex], 0); gStringTable = new byte[tableArray[stringTableIndex]*2]; + b.get(gStringTable); gNormalizedStringTable = new byte[tableArray[normalizedStringTableIndex]*2]; + b.get(gNormalizedStringTable); - reader.read(gConverterList, gTagList, - gAliasList, gUntaggedConvArray, - gTaggedAliasArray, gTaggedAliasLists, - gOptionTable, gStringTable, gNormalizedStringTable); data = ByteBuffer.allocate(0); // dummy UDataMemory object in absence // of memory mapping @@ -445,11 +443,9 @@ final class UConverterAlias { if (listOffset != 0) { //int listCount = gTaggedAliasListsArray[listOffset]; /* +1 to skip listCount */ - int[] currListArray = gTaggedAliasLists; int currListArrayIndex = listOffset + 1; - return GET_STRING(currListArray[currListArrayIndex + n]); - + return GET_STRING(gTaggedAliasLists[currListArrayIndex + n]); } /* else this shouldn't happen. internal program error */ } @@ -482,10 +478,9 @@ final class UConverterAlias { int listOffset = findTaggedAliasListsOffset(alias, standard); if (0 < listOffset && listOffset < gTaggedAliasLists.length) { - int[] currListArray = gTaggedAliasLists; int currListArrayIndex = listOffset + 1; - if (currListArray[0] != 0) { - return GET_STRING(currListArray[currListArrayIndex]); + if (gTaggedAliasLists[0] != 0) { + return GET_STRING(gTaggedAliasLists[currListArrayIndex]); } } } @@ -686,13 +681,12 @@ final class UConverterAlias { int currAlias; int listCount = gTaggedAliasLists[listOffset]; /* +1 to skip listCount */ - int[] currList = gTaggedAliasLists; int currListArrayIndex = listOffset + 1; for (currAlias = 0; currAlias < listCount; currAlias++) { - if (currList[currAlias + currListArrayIndex] != 0 + if (gTaggedAliasLists[currAlias + currListArrayIndex] != 0 && compareNames( alias, - GET_STRING(currList[currAlias + currListArrayIndex])) == 0) { + GET_STRING(gTaggedAliasLists[currAlias + currListArrayIndex])) == 0) { return true; } } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java index ff377ed4544..01242f32c73 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAliasDataReader.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2006-2014, International Business Machines Corporation and + * Copyright (C) 2006-2015, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -144,42 +144,8 @@ final class UConverterAliasDataReader implements ICUBinary.Authenticate { protected int[] readToc(int n)throws IOException { - int[] toc = new int[n]; //Read the toc - for (int i = 0; i < n ; ++i) { - toc[i] = byteBuffer.getInt() & UNSIGNED_INT_MASK; - } - return toc; - } - - protected void read(int[] convList, int[] tagList, int[] aliasList, int[]untaggedConvArray, int[] taggedAliasArray, int[] taggedAliasLists, int[] optionTable, byte[] stringTable, byte[] normalizedStringTable) throws IOException{ - int i; - //int listnum = 1; - //long listsize; - - for(i = 0; i < convList.length; ++i) - convList[i] = byteBuffer.getChar(); - - for(i = 0; i < tagList.length; ++i) - tagList[i] = byteBuffer.getChar(); - - for(i = 0; i < aliasList.length; ++i) - aliasList[i] = byteBuffer.getChar(); - - for(i = 0; i < untaggedConvArray.length; ++i) - untaggedConvArray[i] = byteBuffer.getChar(); - - for(i = 0; i < taggedAliasArray.length; ++i) - taggedAliasArray[i] = byteBuffer.getChar(); - - for(i = 0; i < taggedAliasLists.length; ++i) - taggedAliasLists[i] = byteBuffer.getChar(); - - for(i = 0; i < optionTable.length; ++i) - optionTable[i] = byteBuffer.getChar(); - - byteBuffer.get(stringTable); - byteBuffer.get(normalizedStringTable); + return ICUBinary.getInts(byteBuffer, n, 0); } public boolean isDataVersionAcceptable(byte version[]) @@ -211,7 +177,4 @@ final class UConverterAliasDataReader implements ICUBinary.Authenticate { // DATA_FORMAT_ID_ values taken from icu4c isAcceptable (ucnv_io.c) private static final int DATA_FORMAT_ID = 0x4376416c; // dataFormat="CvAl" private static final byte DATA_FORMAT_VERSION[] = {3, 0, 1}; - - //private static final int UNSIGNED_SHORT_MASK = 0xffff; - private static final int UNSIGNED_INT_MASK = 0xffffffff; } diff --git a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java index 0031b0e5899..ae8368fce25 100644 --- a/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java +++ b/icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java @@ -550,9 +550,8 @@ final class UConverterDataReader { case CharsetMBCS.MBCS_OUTPUT_2: case CharsetMBCS.MBCS_OUTPUT_2_SISO: case CharsetMBCS.MBCS_OUTPUT_3_EUC: - mbcsTable.fromUnicodeChars = new char[header.fromUBytesLength / 2]; - byteBuffer.asCharBuffer().get(mbcsTable.fromUnicodeChars); - ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~1); + mbcsTable.fromUnicodeChars = ICUBinary.getChars( + byteBuffer, header.fromUBytesLength / 2, 0); break; case CharsetMBCS.MBCS_OUTPUT_3: case CharsetMBCS.MBCS_OUTPUT_4_EUC: @@ -560,9 +559,8 @@ final class UConverterDataReader { byteBuffer.get(mbcsTable.fromUnicodeBytes); break; case CharsetMBCS.MBCS_OUTPUT_4: - mbcsTable.fromUnicodeInts = new int[header.fromUBytesLength / 4]; - byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeInts); - ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~3); + mbcsTable.fromUnicodeInts = ICUBinary.getInts( + byteBuffer, header.fromUBytesLength / 4, 0); break; default: // Cannot occur, caller checked already. diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java index 364e05283a8..f90e9aa68be 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java @@ -155,11 +155,7 @@ final class CollationDataReader /* all static */ { throw new ICUException("Collation base data must not reorder scripts"); } reorderCodesLength = length / 4; - reorderCodes = new int[reorderCodesLength]; - for(int i = 0; i < reorderCodesLength; ++i) { - reorderCodes[i] = inBytes.getInt(); - } - length &= 3; + reorderCodes = ICUBinary.getInts(inBytes, reorderCodesLength, length & 3); // The reorderRanges (if any) are the trailing reorderCodes entries. // Split the array at the boundary. @@ -175,8 +171,8 @@ final class CollationDataReader /* all static */ { } else { reorderCodes = new int[0]; reorderCodesLength = 0; + ICUBinary.skipBytes(inBytes, length); } - ICUBinary.skipBytes(inBytes, length); // There should be a reorder table only if there are reorder codes. // However, when there are reorder codes the reorder table may be omitted to reduce @@ -237,13 +233,10 @@ final class CollationDataReader /* all static */ { if(data == null) { throw new ICUException("Tailored ces without tailored trie"); } - data.ces = new long[length / 8]; - for(int i = 0; i < length / 8; ++i) { - data.ces[i] = inBytes.getLong(); - } - length &= 7; + data.ces = ICUBinary.getLongs(inBytes, length / 8, length & 7); + } else { + ICUBinary.skipBytes(inBytes, length); } - ICUBinary.skipBytes(inBytes, length); index = IX_RESERVED10_OFFSET; offset = inIndexes[index]; @@ -257,13 +250,10 @@ final class CollationDataReader /* all static */ { if(data == null) { throw new ICUException("Tailored ce32s without tailored trie"); } - data.ce32s = new int[length / 4]; - for(int i = 0; i < length / 4; ++i) { - data.ce32s[i] = inBytes.getInt(); - } - length &= 3; + data.ce32s = ICUBinary.getInts(inBytes, length / 4, length & 3); + } else { + ICUBinary.skipBytes(inBytes, length); } - ICUBinary.skipBytes(inBytes, length); int jamoCE32sStart = inIndexes[IX_JAMO_CE32S_START]; if(jamoCE32sStart >= 0) { @@ -316,14 +306,10 @@ final class CollationDataReader /* all static */ { if(data == null) { throw new ICUException("Tailored contexts without tailored trie"); } - StringBuilder sb = new StringBuilder(length / 2); - for(int i = 0; i < length / 2; ++i) { - sb.append(inBytes.getChar()); - } - data.contexts = sb.toString(); - length &= 1; + data.contexts = ICUBinary.getString(inBytes, length / 2, length & 1); + } else { + ICUBinary.skipBytes(inBytes, length); } - ICUBinary.skipBytes(inBytes, length); index = IX_UNSAFE_BWD_OFFSET; offset = inIndexes[index]; @@ -352,11 +338,8 @@ final class CollationDataReader /* all static */ { } // Add the ranges from the data file to the unsafe-backward set. USerializedSet sset = new USerializedSet(); - char[] unsafeData = new char[length / 2]; - for(int i = 0; i < length / 2; ++i) { - unsafeData[i] = inBytes.getChar(); - } - length &= 1; + char[] unsafeData = ICUBinary.getChars(inBytes, length / 2, length & 1); + length = 0; sset.getSet(unsafeData, 0); int count = sset.countRanges(); int[] range = new int[2]; @@ -403,11 +386,8 @@ final class CollationDataReader /* all static */ { data.fastLatinTableHeader[i] = inBytes.getChar(); } int tableLength = length / 2 - headerLength; - data.fastLatinTable = new char[tableLength]; - for(int i = 0; i < tableLength; ++i) { - data.fastLatinTable[i] = inBytes.getChar(); - } - length &= 1; + data.fastLatinTable = ICUBinary.getChars(inBytes, tableLength, length & 1); + length = 0; if((header0 >> 8) != CollationFastLatin.VERSION) { throw new ICUException("Fast-Latin table version differs from version in data header"); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java index ce01b0bbd12..6d8a1b198dc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CharTrie.java @@ -1,6 +1,6 @@ /* ****************************************************************************** - * Copyright (C) 1996-2014, International Business Machines Corporation and + * Copyright (C) 1996-2015, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** */ @@ -237,10 +237,7 @@ public class CharTrie extends Trie protected final void unserialize(ByteBuffer bytes) { int indexDataLength = m_dataOffset_ + m_dataLength_; - m_index_ = new char[indexDataLength]; - for (int i = 0; i < indexDataLength; i ++) { - m_index_[i] = bytes.getChar(); - } + m_index_ = ICUBinary.getChars(bytes, indexDataLength, 0); m_data_ = m_index_; m_initialValue_ = m_data_[m_dataOffset_]; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java index 47ca79e496e..c72241c36e1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java @@ -372,6 +372,25 @@ public final class ICUBinary { } } + static int compareKeys(CharSequence key, byte[] bytes, int offset) { + for (int i = 0;; ++i, ++offset) { + int c2 = bytes[offset]; + if (c2 == 0) { + if (i == key.length()) { + return 0; + } else { + return 1; // key > table key because key is longer. + } + } else if (i == key.length()) { + return -1; // key < table key because key is shorter. + } + int diff = (int)key.charAt(i) - c2; + if (diff != 0) { + return diff; + } + } + } + // public inner interface ------------------------------------------------ /** @@ -630,6 +649,41 @@ public final class ICUBinary { } } + public static String getString(ByteBuffer bytes, int length, int additionalSkipLength) { + CharSequence cs = bytes.asCharBuffer(); + String s = cs.subSequence(0, length).toString(); + skipBytes(bytes, length * 2 + additionalSkipLength); + return s; + } + + public static char[] getChars(ByteBuffer bytes, int length, int additionalSkipLength) { + char[] dest = new char[length]; + bytes.asCharBuffer().get(dest); + skipBytes(bytes, length * 2 + additionalSkipLength); + return dest; + } + + public static short[] getShorts(ByteBuffer bytes, int length, int additionalSkipLength) { + short[] dest = new short[length]; + bytes.asShortBuffer().get(dest); + skipBytes(bytes, length * 2 + additionalSkipLength); + return dest; + } + + public static int[] getInts(ByteBuffer bytes, int length, int additionalSkipLength) { + int[] dest = new int[length]; + bytes.asIntBuffer().get(dest); + skipBytes(bytes, length * 4 + additionalSkipLength); + return dest; + } + + public static long[] getLongs(ByteBuffer bytes, int length, int additionalSkipLength) { + long[] dest = new long[length]; + bytes.asLongBuffer().get(dest); + skipBytes(bytes, length * 8 + additionalSkipLength); + return dest; + } + /** * Same as ByteBuffer.slice() plus preserving the byte order. */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java index b7ccbc2bb93..00886221c51 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java @@ -11,6 +11,7 @@ import java.io.InputStream; import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.CharBuffer; +import java.nio.IntBuffer; import com.ibm.icu.util.ICUException; import com.ibm.icu.util.ICUUncheckedIOException; @@ -32,7 +33,9 @@ public final class ICUResourceBundleReader { private static final class IsAcceptable implements ICUBinary.Authenticate { // @Override when we switch to Java 6 public boolean isDataVersionAcceptable(byte formatVersion[]) { - return (1 <= formatVersion[0] && formatVersion[0] <= 3); + return + (formatVersion[0] == 1 && (formatVersion[1] & 0xff) >= 1) || + (2 <= formatVersion[0] && formatVersion[0] <= 3); } } private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable(); @@ -123,6 +126,7 @@ public final class ICUResourceBundleReader { * (equivalent of C++ pRoot) */ private ByteBuffer bytes; + private byte[] keyBytes; private CharBuffer b16BitUnits; private ICUResourceBundleReader poolBundleReader; private int rootRes; @@ -234,7 +238,6 @@ public final class ICUResourceBundleReader { private void init(ByteBuffer inBytes) throws IOException { dataVersion = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE); int majorFormatVersion = inBytes.get(16); - boolean isFormatVersion10 = majorFormatVersion == 1 && inBytes.get(17) == 0; bytes = ICUBinary.sliceWithOrder(inBytes); int dataLength = bytes.remaining(); @@ -243,11 +246,8 @@ public final class ICUResourceBundleReader { rootRes = bytes.getInt(0); - if(isFormatVersion10) { - localKeyLimit = 0x10000; /* greater than any 16-bit key string offset */ - resourceCache = new ResourceCache(dataLength / 4 - 1); - return; - } + // Bundles with formatVersion 1.1 and later contain an indexes[] array. + // We need it so that we can read the key string bytes up front, for lookup performance. // read the variable-length indexes[] array int indexes0 = getIndexesInt(URES_INDEX_LENGTH); @@ -280,9 +280,27 @@ public final class ICUResourceBundleReader { poolStringIndex16Limit = att >>> 16; } + int keysBottom = 1 + indexLength; + int keysTop = getIndexesInt(URES_INDEX_KEYS_TOP); + if(keysTop > keysBottom) { + // Deserialize the key strings up front. + // Faster table item search at the cost of slower startup and some heap memory. + if(isPoolBundle) { + // Shift the key strings down: + // Pool bundle key strings are used with a 0-based index, + // unlike regular bundles' key strings for which indexes + // are based on the start of the bundle data. + keyBytes = new byte[(keysTop - keysBottom) << 2]; + bytes.position(keysBottom << 2); + } else { + localKeyLimit = keysTop << 2; + keyBytes = new byte[localKeyLimit]; + } + bytes.get(keyBytes); + } + // Read the array of 16-bit units. if(indexLength > URES_INDEX_16BIT_TOP) { - int keysTop = getIndexesInt(URES_INDEX_KEYS_TOP); int _16BitTop = getIndexesInt(URES_INDEX_16BIT_TOP); if(_16BitTop > keysTop) { int num16BitUnits = (_16BitTop - keysTop) * 2; @@ -301,25 +319,12 @@ public final class ICUResourceBundleReader { poolCheckSum = getIndexesInt(URES_INDEX_POOL_CHECKSUM); } - // Handle key strings last: - // If this is a pool bundle, then we shift all bytes down, - // and getIndexesInt() will not work any more. - if(getIndexesInt(URES_INDEX_KEYS_TOP) > (1 + indexLength)) { - if(isPoolBundle) { - // Shift the key strings down: - // Pool bundle key strings are used with a 0-based index, - // unlike regular bundles' key strings for which indexes - // are based on the start of the bundle data. - bytes.position((1 + indexLength) << 2); - bytes = ICUBinary.sliceWithOrder(bytes); - } else { - localKeyLimit = getIndexesInt(URES_INDEX_KEYS_TOP) << 2; - } - } - if(!isPoolBundle || b16BitUnits.length() > 1) { resourceCache = new ResourceCache(maxOffset); } + + // Reset the position for future .asCharBuffer() etc. + bytes.position(0); } private int getIndexesInt(int i) { @@ -371,13 +376,16 @@ public final class ICUResourceBundleReader { private static final Container EMPTY_ARRAY = new Container(); private static final Table EMPTY_TABLE = new Table(); - private char getChar(int offset) { - return bytes.getChar(offset); - } private char[] getChars(int offset, int count) { char[] chars = new char[count]; - for(int i = 0; i < count; offset += 2, ++i) { - chars[i] = bytes.getChar(offset); + if (count <= 16) { + for(int i = 0; i < count; offset += 2, ++i) { + chars[i] = bytes.getChar(offset); + } + } else { + CharBuffer temp = bytes.asCharBuffer(); + temp.position(offset / 2); + temp.get(chars); } return chars; } @@ -386,8 +394,14 @@ public final class ICUResourceBundleReader { } private int[] getInts(int offset, int count) { int[] ints = new int[count]; - for(int i = 0; i < count; offset += 4, ++i) { - ints[i] = bytes.getInt(offset); + if (count <= 16) { + for(int i = 0; i < count; offset += 4, ++i) { + ints[i] = bytes.getInt(offset); + } + } else { + IntBuffer temp = bytes.asIntBuffer(); + temp.position(offset / 4); + temp.get(ints); } return ints; } @@ -410,7 +424,7 @@ public final class ICUResourceBundleReader { } } private char[] getTableKeyOffsets(int offset) { - int length = getChar(offset); + int length = bytes.getChar(offset); if(length > 0) { return getChars(offset + 2, length); } else { @@ -426,10 +440,10 @@ public final class ICUResourceBundleReader { } } - private static String makeKeyStringFromBytes(ByteBuffer keyBytes, int keyOffset) { + private static String makeKeyStringFromBytes(byte[] keyBytes, int keyOffset) { StringBuilder sb = new StringBuilder(); byte b; - while((b = keyBytes.get(keyOffset)) != 0) { + while((b = keyBytes[keyOffset]) != 0) { ++keyOffset; sb.append((char)b); } @@ -437,30 +451,30 @@ public final class ICUResourceBundleReader { } private String getKey16String(int keyOffset) { if(keyOffset < localKeyLimit) { - return makeKeyStringFromBytes(bytes, keyOffset); + return makeKeyStringFromBytes(keyBytes, keyOffset); } else { - return makeKeyStringFromBytes(poolBundleReader.bytes, keyOffset - localKeyLimit); + return makeKeyStringFromBytes(poolBundleReader.keyBytes, keyOffset - localKeyLimit); } } private String getKey32String(int keyOffset) { if(keyOffset >= 0) { - return makeKeyStringFromBytes(bytes, keyOffset); + return makeKeyStringFromBytes(keyBytes, keyOffset); } else { - return makeKeyStringFromBytes(poolBundleReader.bytes, keyOffset & 0x7fffffff); + return makeKeyStringFromBytes(poolBundleReader.keyBytes, keyOffset & 0x7fffffff); } } private int compareKeys(CharSequence key, char keyOffset) { if(keyOffset < localKeyLimit) { - return ICUBinary.compareKeys(key, bytes, keyOffset); + return ICUBinary.compareKeys(key, keyBytes, keyOffset); } else { - return ICUBinary.compareKeys(key, poolBundleReader.bytes, keyOffset - localKeyLimit); + return ICUBinary.compareKeys(key, poolBundleReader.keyBytes, keyOffset - localKeyLimit); } } private int compareKeys32(CharSequence key, int keyOffset) { if(keyOffset >= 0) { - return ICUBinary.compareKeys(key, bytes, keyOffset); + return ICUBinary.compareKeys(key, keyBytes, keyOffset); } else { - return ICUBinary.compareKeys(key, poolBundleReader.bytes, keyOffset & 0x7fffffff); + return ICUBinary.compareKeys(key, poolBundleReader.keyBytes, keyOffset & 0x7fffffff); } } @@ -506,12 +520,26 @@ public final class ICUResourceBundleReader { offset+=3; } // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change - // which makes code compiled for a newer JDK not run on an older one. + // which makes code compiled for a newer JDK (7 and up) not run on an older one (6 and below). s = ((CharSequence) b16BitUnits).subSequence(offset, offset + length).toString(); } return (String)resourceCache.putIfAbsent(res, s, s.length() * 2); } + private String makeStringFromBytes(int offset, int length) { + if (length <= 16) { + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; offset += 2, ++i) { + sb.append(bytes.getChar(offset)); + } + return sb.toString(); + } else { + CharSequence cs = bytes.asCharBuffer(); + offset /= 2; + return cs.subSequence(offset, offset + length).toString(); + } + } + String getString(int res) { int offset=RES_GET_OFFSET(res); if(res != offset /* RES_GET_TYPE(res) != URES_STRING */ && @@ -534,7 +562,7 @@ public final class ICUResourceBundleReader { } offset=getResourceByteOffset(offset); int length = getInt(offset); - String s = new String(getChars(offset+4, length)); + String s = makeStringFromBytes(offset+4, length); return (String)resourceCache.putIfAbsent(res, s, s.length() * 2); } @@ -551,7 +579,7 @@ public final class ICUResourceBundleReader { } offset=getResourceByteOffset(offset); length=getInt(offset); - String s = new String(getChars(offset + 4, length)); + String s = makeStringFromBytes(offset + 4, length); return (String)resourceCache.putIfAbsent(res, s, length * 2); } } else { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java index 475892adbe8..e18cb400fef 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/IntTrie.java @@ -1,6 +1,6 @@ /* ****************************************************************************** - * Copyright (C) 1996-2014, International Business Machines Corporation and + * Copyright (C) 1996-2015, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** */ @@ -248,10 +248,7 @@ public class IntTrie extends Trie { super.unserialize(bytes); // one used for initial value - m_data_ = new int[m_dataLength_]; - for (int i = 0; i < m_dataLength_; i ++) { - m_data_[i] = bytes.getInt(); - } + m_data_ = ICUBinary.getInts(bytes, m_dataLength_, 0); m_initialValue_ = m_data_[0]; } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java index 6f7c2a93c9b..9e26f3aaaba 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/InvalidFormatException.java @@ -1,7 +1,7 @@ /** ******************************************************************************* -* Copyright (C) 2006, International Business Machines Corporation and * -* others. All Rights Reserved. * +* Copyright (C) 2006-2015, International Business Machines Corporation and +* others. All Rights Reserved. ******************************************************************************* * ******************************************************************************* @@ -14,8 +14,12 @@ public class InvalidFormatException extends Exception { static final long serialVersionUID = 8883328905089345791L; public InvalidFormatException(){} - - public InvalidFormatException(String message){ + + public InvalidFormatException(Throwable cause) { + super(cause); + } + + public InvalidFormatException(String message) { super(message); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java index 0397ca199f0..d1af978924f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2009-2014, International Business Machines + * Copyright (C) 2009-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ @@ -454,22 +454,15 @@ public final class Normalizer2Impl { offset=nextOffset; nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; int numChars=(nextOffset-offset)/2; - char[] chars; if(numChars!=0) { - chars=new char[numChars]; - for(int i=0; i { if (width == ValueWidth.BITS_16) { indexArraySize += This.dataLength; } - This.index = new char[indexArraySize]; /* Read in the index */ - int i; - for (i=0; i0) { - mirrors=new int[count]; - for(i=0; i0) { - exceptions=new char[count]; - for(i=0; i0) { - unfold=new char[count]; - for(i=0; i 0) { - m_scriptExtensions_ = new char[numChars]; - for(int i = 0; i < numChars; ++i) { - m_scriptExtensions_[i] = bytes.getChar(); - } + m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java index 151dca2d4f4..b4cff0c8d89 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (c) 2002-2014, International Business Machines + * Copyright (c) 2002-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -91,10 +91,7 @@ public final class UPropertyAliases { int offset=inIndexes[IX_VALUE_MAPS_OFFSET]; int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET]; int numInts=(nextOffset-offset)/4; - valueMaps=new int[numInts]; - for(int i=0; i=0) { This.dump(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java index 2e1fba95236..42c2a456657 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java @@ -2313,25 +2313,15 @@ public class SpoofChecker { bytes.reset(); ICUBinary.skipBytes(bytes, CFUKeysOffset); - fCFUKeys = new int[CFUKeysSize]; - for (i = 0; i < CFUKeysSize; i++) { - fCFUKeys[i] = bytes.getInt(); - } + fCFUKeys = ICUBinary.getInts(bytes, CFUKeysSize, 0); bytes.reset(); ICUBinary.skipBytes(bytes, CFUValuesOffset); - fCFUValues = new short[CFUValuesSize]; - for (i = 0; i < CFUValuesSize; i++) { - fCFUValues[i] = bytes.getShort(); - } + fCFUValues = ICUBinary.getShorts(bytes, CFUValuesSize, 0); bytes.reset(); ICUBinary.skipBytes(bytes, CFUStringTableOffset); - StringBuffer CFUStringB = new StringBuffer(); - for (i = 0; i < CFUStringTableSize; i++) { - CFUStringB.append(bytes.getChar()); - } - fCFUStrings = CFUStringB.toString(); + fCFUStrings = ICUBinary.getString(bytes, CFUStringTableSize, 0); bytes.reset(); ICUBinary.skipBytes(bytes, CFUStringLengthsOffset); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java index a6fe703aad6..1eccd4d76b6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java @@ -282,9 +282,8 @@ public final class StringPrep { sprepTrie = new CharTrie(bytes, null); //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes - mappingData = new char[indexes[INDEX_MAPPING_DATA_SIZE]/2]; // load the rest of the data data and initialize the data members - reader.read(mappingData); + mappingData = reader.read(indexes[INDEX_MAPPING_DATA_SIZE]/2); // get the data format version /*formatVersion = */reader.getDataFormatVersion(); -- 2.40.0