]> granicus.if.org Git - icu/commitdiff
ICU-10939 ICU4J optionally read data from .dat and .res, from ICUConfig-ured dataPath...
authorMarkus Scherer <markus.icu@gmail.com>
Thu, 31 Jul 2014 18:46:54 +0000 (18:46 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Thu, 31 Jul 2014 18:46:54 +0000 (18:46 +0000)
X-SVN-Rev: 36106

28 files changed:
icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetCallback.java
icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetISO2022.java
icu4j/main/classes/charset/src/com/ibm/icu/charset/CharsetMBCS.java
icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterAlias.java
icu4j/main/classes/charset/src/com/ibm/icu/charset/UConverterDataReader.java
icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRoot.java
icu4j/main/classes/core/src/com/ibm/icu/ICUConfig.properties
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUBinary.java
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUData.java
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundle.java
icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java
icu4j/main/classes/core/src/com/ibm/icu/impl/Norm2AllModes.java
icu4j/main/classes/core/src/com/ibm/icu/impl/Normalizer2Impl.java
icu4j/main/classes/core/src/com/ibm/icu/impl/Trie2.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UBiDiProps.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterName.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UCharacterProperty.java
icu4j/main/classes/core/src/com/ibm/icu/impl/UPropertyAliases.java
icu4j/main/classes/core/src/com/ibm/icu/impl/duration/impl/ResourceBasedPeriodFormatterDataService.java
icu4j/main/classes/core/src/com/ibm/icu/text/BreakIteratorFactory.java
icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryData.java
icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java
icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
icu4j/main/classes/core/src/com/ibm/icu/text/StringPrep.java
icu4j/main/tests/charset/src/com/ibm/icu/dev/test/charset/TestConversion.java

index 7363fba08fe3ab128c79123036b7e257267720a2..a76fec0d6e932fe286cb2ece86c1d91d60596648 100644 (file)
@@ -1,11 +1,9 @@
 /**
 *******************************************************************************
-* Copyright (C) 2006-2011, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 2006-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
 *******************************************************************************
-*
-*******************************************************************************
-*/ 
+*/
 
 package com.ibm.icu.charset;
 
@@ -253,14 +251,14 @@ public class CharsetCallback {
                 while (i < length) {
                     valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
                     valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
-                    valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+                    valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
                 }
             } else {
                 if (((String)context).equals(ESCAPE_JAVA)) {
                     while (i < length) {
                         valueString[valueStringLength++] = UNICODE_RS_CODEPOINT;    /* adding \ */
                         valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
                     }
                 } else if (((String)context).equals(ESCAPE_C)) {
                     valueString[valueStringLength++] = UNICODE_RS_CODEPOINT;    /* adding \ */
@@ -270,7 +268,7 @@ public class CharsetCallback {
                         valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
                     } else {
                         valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
                     }
                 } else if (((String)context).equals(ESCAPE_XML_DEC)) {
                     valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT;   /* adding & */
@@ -278,7 +276,7 @@ public class CharsetCallback {
                     if (length == 2) {
                         valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
                     } else {
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[0], 10, 0);
                     }
                     valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
                 } else if (((String)context).equals(ESCAPE_XML_HEX)) {
@@ -288,7 +286,7 @@ public class CharsetCallback {
                     if (length == 2) {
                         valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
                     } else {
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 0);
                     }
                     valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
                 } else if (((String)context).equals(ESCAPE_UNICODE)) {
@@ -298,7 +296,7 @@ public class CharsetCallback {
                     if (length == 2) {
                         valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
                     } else {
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
                     }
                     valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT;   /* adding } */
                 } else if (((String)context).equals(ESCAPE_CSS2)) {
@@ -311,7 +309,7 @@ public class CharsetCallback {
                     while (i < length) {
                         valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
                         valueString[valueStringLength++] = UNICODE_U_CODEPOINT;             /* adding U */
-                        valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+                        valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
                     }
                 }
             }
index d366cbe4180ba29f38ba220931fc81b8c948771e..02424154b38485c7fa9c7df960bb04943c5d80f8 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *******************************************************************************
- * Copyright (C) 2008-2011, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2008-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  *******************************************************************************
  */
 package com.ibm.icu.charset;
@@ -271,13 +271,13 @@ class CharsetISO2022 extends CharsetICU {
         char[] table;
         int value;
         /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
-        if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+        if (c >= 0x10000 && !sharedData.mbcs.hasSupplementary()) {
             return 0;
         }
         /* convert the Unicode code point in c into codepage bytes */
         table = sharedData.mbcs.fromUnicodeTable;
         /* get the byte for the output */
-        value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+        value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
         /* get the byte for the output */
         retval[0] = value & 0xff;
         if (value >= 0xf00) {
index 06dfa14aa7b1dd8b527a33f656ebdb6b6bd361ef..f4c42f570b056ea5f1ff79d725626ec3e05bdd04 100644 (file)
@@ -54,6 +54,11 @@ class CharsetMBCS extends CharsetICU {
     final static class MBCSToUFallback {
         int offset;
         int codePoint;
+
+        MBCSToUFallback(int off, int cp) {
+            offset = off;
+            codePoint = cp;
+        }
     }
 
     /**
@@ -73,9 +78,14 @@ class CharsetMBCS extends CharsetICU {
         MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
 
         /* fromUnicode */
-        char fromUnicodeTable[];
+        char fromUnicodeTable[];  // stage1, and for MBCS_OUTPUT_1 also contains stage2
+        int fromUnicodeTableInts[];  // stage1 and stage2 together as int[]
+        // Exactly one of the fromUnicode(Type) tables is not null,
+        // depending on the outputType.
         byte fromUnicodeBytes[];
-        byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */
+        char fromUnicodeChars[];
+        int fromUnicodeInts[];
+        char swapLFNLFromUnicodeChars[]; /* for swaplfnl */
         int fromUBytesLength;
         short outputType, unicodeMask;
 
@@ -88,17 +98,20 @@ class CharsetMBCS extends CharsetICU {
         ByteBuffer extIndexes; // create int[] view etc. as needed
         
         CharBuffer mbcsIndex;                     /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
-        char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
+        // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
         boolean utf8Friendly;                     /* for utf8Friendly data */
         char maxFastUChar;                        /* for utf8Friendly data */
 
         /* roundtrips */
-        long asciiRoundtrips;
+        int asciiRoundtrips;
 
         UConverterMBCSTable() {
             utf8Friendly = false;
             mbcsIndex = null;
-            sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
+        }
+
+        boolean hasSupplementary() {
+            return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0;
         }
 
         /*
@@ -106,7 +119,7 @@ class CharsetMBCS extends CharsetICU {
          * stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
          * swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
          * t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
-         * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType =
+         * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType =
          * t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
          * extIndexes = t.extIndexes; }
          */
@@ -193,15 +206,19 @@ class CharsetMBCS extends CharsetICU {
         UConverterStaticData staticData = new UConverterStaticData();
         UConverterDataReader reader = null;
         try {
-            String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE;
-            InputStream i;
+            String itemName = myName + '.' + UConverterSharedData.DATA_TYPE;
+            String resourceName = classPath + '/' + itemName;
+            ByteBuffer b;
 
             if (loader != null) {
-                i = ICUData.getRequiredStream(loader, resourceName);
+                InputStream i = ICUData.getRequiredStream(loader, resourceName);
+                b = ICUBinary.getByteBufferFromInputStream(i);
+            } else if (!classPath.equals(ICUData.ICU_BUNDLE)) {
+                InputStream i = ICUData.getRequiredStream(resourceName);
+                b = ICUBinary.getByteBufferFromInputStream(i);
             } else {
-                i = ICUData.getRequiredStream(resourceName);
+                b = ICUBinary.getRequiredData(itemName);
             }
-            ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i);
             reader = new UConverterDataReader(b);
             reader.readStaticData(staticData);
         } catch (IOException e) {
@@ -235,11 +252,6 @@ class CharsetMBCS extends CharsetICU {
         int offset;
         // int[] extIndexesArray = null;
         String baseNameString = null;
-        int[][] stateTableArray = null;
-        MBCSToUFallback[] toUFallbacksArray = null;
-        char[] unicodeCodeUnitsArray = null;
-        char[] fromUnicodeTableArray = null;
-        byte[] fromUnicodeBytesArray = null;
 
         if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
             noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
@@ -258,8 +270,7 @@ class CharsetMBCS extends CharsetICU {
                 if (offset != 0) {
                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
                     // terminator byte all already read;
-                    mbcsTable.extIndexes = reader.readExtIndexes(offset
-                            - (reader.bytesRead - reader.staticDataBytesRead));
+                    mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
                 }
             } catch (IOException e) {
                 throw new InvalidFormatException();
@@ -329,7 +340,7 @@ class CharsetMBCS extends CharsetICU {
              * for the extension converter separately when it is requested.
              */
             mbcsTable.swapLFNLStateTable = null;
-            mbcsTable.swapLFNLFromUnicodeBytes = null;
+            mbcsTable.swapLFNLFromUnicodeChars = null;
             mbcsTable.swapLFNLName = null;
 
             /*
@@ -412,50 +423,31 @@ class CharsetMBCS extends CharsetICU {
                 throw new InvalidFormatException();
             }
 
-            stateTableArray = new int[header.countStates][256];
-            toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks];
-            for (int i = 0; i < toUFallbacksArray.length; ++i)
-                toUFallbacksArray[i] = new MBCSToUFallback();
-            unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2];
-            fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2];
-            fromUnicodeBytesArray = new byte[header.fromUBytesLength];
-            try {
-                reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray,
-                        fromUnicodeBytesArray);
-            } catch (IOException e) {
-                throw new InvalidFormatException();
-            }
-
-            mbcsTable.countStates = (byte) header.countStates;
-            mbcsTable.countToUFallbacks = header.countToUFallbacks;
-            mbcsTable.stateTable = stateTableArray;
-            mbcsTable.toUFallbacks = toUFallbacksArray;
-            mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray;
-
-            mbcsTable.fromUnicodeTable = fromUnicodeTableArray;
-            mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray;
-            mbcsTable.fromUBytesLength = header.fromUBytesLength;
-
             /*
              * converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
              * function implementations
              */
             // agljport:fix info.size=sizeof(UDataInfo);
             // agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
-            // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
-            /* mask off possible future extensions to be safe */
-            mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
-            // agljport:fix } else {
-            /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
-            // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
-            // agljport:fix }
+            if (reader.dataFormatHasUnicodeMask()) {
+                /* mask off possible future extensions to be safe */
+                mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
+            } else {
+                /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
+                mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES;
+            }
+            try {
+                reader.readMBCSTable(header, mbcsTable);
+            } catch (IOException e) {
+                throw new InvalidFormatException();
+            }
+
             if (offset != 0) {
                 try {
                     // agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
                     // terminator byte all already read;
                     // int namelen = baseNameString != null? baseNameString.length() + 1: 0;
-                    mbcsTable.extIndexes = reader.readExtIndexes(offset
-                            - (reader.bytesRead - reader.staticDataBytesRead));
+                    mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
                 } catch (IOException e) {
                     throw new InvalidFormatException();
                 }
@@ -468,12 +460,13 @@ class CharsetMBCS extends CharsetICU {
                 if (mbcsTable.countStates == 1) {
                     /*
                      * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
-                     * Build a table with indexes to each block, to be used instaed of
+                     * Build a table with indexes to each block, to be used instead of
                      * the regular stage 1/2 table.
                      */
-                    for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
-                        mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
-                    }
+//                    sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
+//                    for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
+//                        mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
+//                    }
                     /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
                     mbcsTable.maxFastUChar = SBCS_FAST_MAX;
                 } else {
@@ -481,23 +474,21 @@ class CharsetMBCS extends CharsetICU {
                      * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
                      * The .cnv file is prebuilt with an additional stage table with indexes to each block.
                      */
-                    if (noFromU) {
-                        mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
-                    }
                     mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
                 }
             }
             /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
             {
-                long asciiRoundtrips = 0xffffffff;
+                int asciiRoundtrips = 0xffffffff;
                 for (int i = 0; i < 0x80; ++i) {
                     if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
-                        asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK;
+                        asciiRoundtrips &= ~(1 << (i >> 2));
                     }
                 }
-                mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK;
+                mbcsTable.asciiRoundtrips = asciiRoundtrips;
             }
-            
+            // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C.
+
             if (noFromU) {
                 int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
                 int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
@@ -511,9 +502,11 @@ class CharsetMBCS extends CharsetICU {
                 mbcsTable.asciiRoundtrips = 0;
             }
         }
+        // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C.
+        mbcsTable.mbcsIndex = null;
         return data;
     }
-    
+
     private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
         char[] table;
         byte[] bytes;
@@ -524,7 +517,10 @@ class CharsetMBCS extends CharsetICU {
         long temp;
 
         table = mbcsTable.fromUnicodeTable;
+        int[] tableInts = mbcsTable.fromUnicodeTableInts;
         bytes = mbcsTable.fromUnicodeBytes;
+        char[] chars = mbcsTable.fromUnicodeChars;
+        int[] ints = mbcsTable.fromUnicodeInts;
 
         /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
         switch(mbcsTable.outputType) {
@@ -564,7 +560,7 @@ class CharsetMBCS extends CharsetICU {
 
             /* locate the stage 2 & 3 data */
             stage2 = table[c>>10] + ((c>>4)&0x3f);
-            st3 = table[stage2*2]<<16|table[stage2*2 + 1];
+            st3 = tableInts[stage2];
             st3 = (int)(char)(st3 * 16 + (c&0xf));
 
             /* write the codepage bytes into stage 3 */
@@ -577,54 +573,42 @@ class CharsetMBCS extends CharsetICU {
                 bytes[p+2] = (byte)value;
                 break;
             case MBCS_OUTPUT_4:
-                bytes[st3*4] = (byte)(value >> 24);
-                bytes[st3*4 + 1] = (byte)(value >> 16);
-                bytes[st3*4 + 2] = (byte)(value >> 8);
-                bytes[st3*4 + 3] = (byte)value;
+                ints[st3] = (int)value;
                 break;
             default:
                 /* 2 bytes per character */
-                bytes[st3*2] = (byte)(value >> 8);
-                bytes[st3*2 + 1] = (byte)value;
+                chars[st3] = (char)value;
                 break;
             }
 
             /* set the roundtrip flag */
             temp = (1L<<(16+(c&0xf)));
-            table[stage2*2] |= (char)(temp>>16);
-            table[stage2*2 + 1] |= (char)temp;
+            tableInts[stage2] |= temp;
         }
         return true;
      }
-    
-    private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) {
-        int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength;
-        int offset = 0;
-        byte[] stage = new byte[datalength];
-        
-        for (int i = 0; i < stage1Length; ++i) {
-            stage[i*2]   = (byte)(mbcsTable.fromUnicodeTable[i]>>8);
-            stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]);
-        }
-        
-        offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2);
-        for (int i = 0; i < stage2Length; ++i) {
-            stage[offset + i*4]   = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8);
-            stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]);
-            stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8);
-            stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]);
-        }
-        
-        /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
-        
-        /* reconsitute the initial part of stage 2 from the mbcsIndex */
+
+    private static void reconstituteData(UConverterMBCSTable mbcsTable,
+            int stage1Length, int stage2Length, int fullStage2Length) {
+        char[] stage1 = mbcsTable.fromUnicodeTable;
+
+        // stage2 starts with unused stage1 space.
+        // Indexes into stage 2 count from the bottom of the fromUnicodeTable.
+        int numStage1Ints = stage1Length / 2;  // 2 chars = 1 int
+        int[] stage2 = new int[numStage1Ints + fullStage2Length];
+        System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints,
+                stage2, (fullStage2Length - stage2Length) + numStage1Ints,
+                stage2Length);
+        mbcsTable.fromUnicodeTableInts = stage2;
+
+        /* reconstitute the initial part of stage 2 from the mbcsIndex */
         {
             int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
             int stageUTF8Index=0;
             int st1, st2, st3, i;
-            
+
             for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
-                st2 = ((char)stage[2*st1]<<8) | (0xff & stage[2*st1+1]);
+                st2 = stage1[st1];
                 if (st2 != stage1Length/2) {
                     /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
                     for (i = 0; i < 16; ++i) {
@@ -636,10 +620,10 @@ class CharsetMBCS extends CharsetICU {
                              * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
                              * allocated together as a single 64-block for access from the mbcsIndex
                              */
-                            stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
-                            stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
-                            stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
-                            stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3);
+                            stage2[st2++] = st3++;
+                            stage2[st2++] = st3++;
+                            stage2[st2++] = st3++;
+                            stage2[st2++] = st3;
                         } else {
                             /* no stage 3 block, skip */
                             st2+=4;
@@ -651,17 +635,25 @@ class CharsetMBCS extends CharsetICU {
                 }
             }
         }
-        
-        char[] stage1 = new char[stage.length/2];
-        for (int i = 0; i < stage1.length; ++i) {
-            stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+
+        switch (mbcsTable.outputType) {
+        case CharsetMBCS.MBCS_OUTPUT_2:
+        case CharsetMBCS.MBCS_OUTPUT_2_SISO:
+        case CharsetMBCS.MBCS_OUTPUT_3_EUC:
+            mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2];
+            break;
+        case CharsetMBCS.MBCS_OUTPUT_3:
+        case CharsetMBCS.MBCS_OUTPUT_4_EUC:
+            mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength];
+            break;
+        case CharsetMBCS.MBCS_OUTPUT_4:
+            mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4];
+            break;
+        default:
+            // Cannot occur, caller checked already.
+            assert false;
         }
-        byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))];
-        System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length);
-        
-        mbcsTable.fromUnicodeTable = stage1;
-        mbcsTable.fromUnicodeBytes = stage2;
-        
+
         /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
         MBCSEnumToUnicode(mbcsTable);
     }
@@ -786,7 +778,7 @@ class CharsetMBCS extends CharsetICU {
             }
             if (((++b)&0x1f) == 0) {
                 if(anyCodePoints>=0) {
-                    if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) {
+                    if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) {
                         return false;
                     }
                     anyCodePoints=-1;
@@ -965,30 +957,26 @@ class CharsetMBCS extends CharsetICU {
     
     private boolean EBCDICSwapLFNL() throws Exception {
         UConverterMBCSTable mbcsTable;
-        
+
         char[] table;
-        byte[] results;
-        byte[] bytes;
-        
+
         int[][] newStateTable;
-        byte[] newResults;
         String newName;
-        
+
         int stage2Entry;
-//        int size;
-        int sizeofFromUBytes;
-        
+
         mbcsTable = sharedData.mbcs;
         
         table = mbcsTable.fromUnicodeTable;
-        bytes = mbcsTable.fromUnicodeBytes;
-        results = bytes;
-        
+        int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
+        char[] chars = mbcsTable.fromUnicodeChars;
+        char[] results = chars;
+
         /*
          * Check that this is an EBCDIC table with SBCS portion -
          * SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
          * 
-         * If not, ignore the option Options are always ignored if they do not apply.
+         * If not, ignore the option. Options are always ignored if they do not apply.
          */
         if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
               mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
@@ -1002,15 +990,15 @@ class CharsetMBCS extends CharsetICU {
                 return false;
             }
         } else /* MBCS_OUTPUT_2_SISO */ {
-            stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+            stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
-                  EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) {
+                  EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) {
                 return false;
             }
             
-            stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+            stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
             if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
-                  EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) {
+                  EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) {
                 return false;
             }
         }
@@ -1020,7 +1008,7 @@ class CharsetMBCS extends CharsetICU {
              * We _know_ the number of bytes in the fromUnicodeBytes array
              * starting with header.version 4.1.
              */
-            sizeofFromUBytes = mbcsTable.fromUBytesLength;
+            // sizeofFromUBytes = mbcsTable.fromUBytesLength;
         } else {
             /*
              * Otherwise:
@@ -1050,26 +1038,26 @@ class CharsetMBCS extends CharsetICU {
         newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
         
         /* copy and modify the from-Unicode result table */
-        newResults = new byte[sizeofFromUBytes];
-        System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes);
+        char[] newResults = new char[chars.length];
+        System.arraycopy(chars, 0, newResults, 0, chars.length);
         /* conveniently, the table access macros work on the left side of expressions */
         if (mbcsTable.outputType == MBCS_OUTPUT_1) {
             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
             MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
         } else /* MBCS_OUTPUT_2_SISO */ {
-            stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+            stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
             
-            stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+            stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
             MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
         }
         
         /* set the canonical converter name */
         newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
-        
+
         if (mbcsTable.swapLFNLStateTable == null) {
             mbcsTable.swapLFNLStateTable = newStateTable;
-            mbcsTable.swapLFNLFromUnicodeBytes = newResults;
+            mbcsTable.swapLFNLFromUnicodeChars = newResults;
             mbcsTable.swapLFNLName = newName;
         }
         return true;
@@ -1094,13 +1082,13 @@ class CharsetMBCS extends CharsetICU {
     /* GB 18030 data ------------------------------------------------------------ */
 
     /* helper macros for linear values for GB 18030 four-byte sequences */
-    private static long LINEAR_18030(long a, long b, long c, long d) {
-        return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff));
+    private static int LINEAR_18030(int a, int b, int c, int d) {
+        return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff));
     }
 
-    private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
+    private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
 
-    private static long LINEAR(long x) {
+    private static int LINEAR(int x) {
         return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
     }
 
@@ -1111,21 +1099,21 @@ class CharsetMBCS extends CharsetICU {
      * 
      * Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
      */
-    private static final long gb18030Ranges[][] = new long[/* 14 */][/* 4 */] {
-            { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) },
-            { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) },
-            { 0x0452L, 0x1E3EL, LINEAR(0x8130D330L), LINEAR(0x8135F436L) },
-            { 0x1E40L, 0x200FL, LINEAR(0x8135F438L), LINEAR(0x8136A531L) },
-            { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) },
-            { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) },
-            { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) },
-            { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) },
-            { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) },
-            { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) },
-            { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) },
-            { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) },
-            { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) },
-            { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } };
+    private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] {
+            { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) },
+            { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) },
+            { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) },
+            { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) },
+            { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) },
+            { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) },
+            { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) },
+            { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) },
+            { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) },
+            { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) },
+            { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) },
+            { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) },
+            { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) },
+            { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } };
 
     /* bit flag for UConverter.options indicating GB 18030 special handling */
     private static final int MBCS_OPTION_GB18030 = 0x8000;
@@ -1270,57 +1258,51 @@ class CharsetMBCS extends CharsetICU {
      * single-state codepages that only map to and from BMP code points, and it always returns fallback values.
      */
     static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
-        return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]);
+        assert 0 <= b && b <= 0xff;
+        return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]);
     }
 
     /* single-byte fromUnicode: get the 16-bit result word */
-    static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
+    static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) {
         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
-        int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
-        return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+        int i = table[i1] + (c & 0xf);
+        return results[i];
     }
     
     /* single-byte fromUnicode: set the 16-bit result word with newValue*/
-    static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) {
+    static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) {
         int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
-        int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
-        results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
-        results[i + 1] =  (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+        int i = table[i1] + (c & 0xf);
+        results[i] = (char) newValue;
     }
 
     /* multi-byte fromUnicode: get the 32-bit stage 2 entry */
-    static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
-        int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as
-        // int[] array
-        return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)
-                | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK);
+    static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) {
+        int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f);
+        return tableInts[i];
     }
 
     private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
         return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
     }
 
-    static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
-        int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
-        return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+    static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) {
+        int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+        return chars[i];
     }
-    
-    static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) {
-        int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
-        bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
-        bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+    static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) {
+        int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+        chars[i] = (char) newValue;
     }
 
-    private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
-        int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
-        return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24)
-                | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
-                | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
-                | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);
+    private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) {
+        int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+        return ints[i];
     }
 
     static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
-        return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
+        return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
     }
 
     // ------------UConverterExt-------------------------------------------------------
@@ -1384,7 +1366,7 @@ class CharsetMBCS extends CharsetICU {
     }
 
     static boolean TO_U_IS_PARTIAL(int value) {
-        return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;
+        return 0 <= value && value < TO_U_MIN_CODE_POINT;
     }
 
     static int TO_U_GET_PARTIAL_INDEX(int value) {
@@ -1396,16 +1378,19 @@ class CharsetMBCS extends CharsetICU {
     }
 
     private static int TO_U_MAKE_WORD(byte b, int value) {
-        return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value;
+        // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int.
+        return (b << TO_U_BYTE_SHIFT) | value;
     }
 
     /* use after masking off the roundtrip flag */
     static boolean TO_U_IS_CODE_POINT(int value) {
-        return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;
+        assert value >= 0;
+        return value <= TO_U_MAX_CODE_POINT;
     }
 
     static int TO_U_GET_CODE_POINT(int value) {
-        return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);
+        assert value >= 0;
+        return value - TO_U_MIN_CODE_POINT;
     }
 
     private static int TO_U_GET_INDEX(int value) {
@@ -1474,6 +1459,9 @@ class CharsetMBCS extends CharsetICU {
         int oldpos = indexes.position();
         Buffer b;
 
+        // TODO: It is very inefficient to create Buffer objects for each array access.
+        // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions)
+        // which has buffers for the arrays, together with the code that works with them.
         indexes.position(indexes.getInt(index << 2));
         if (itemType == int.class)
             b = indexes.asIntBuffer();
@@ -1900,8 +1888,8 @@ class CharsetMBCS extends CharsetICU {
 
             /* GB 18030 */
             if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
-                long[] range;
-                long linear;
+                int[] range;
+                int linear;
                 int i;
 
                 linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
@@ -1915,7 +1903,7 @@ class CharsetMBCS extends CharsetICU {
                         linear = range[0] + (linear - range[2]);
 
                         /* output this code point */
-                        cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex);
+                        cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex);
 
                         return 0;
                     }
@@ -2045,7 +2033,7 @@ class CharsetMBCS extends CharsetICU {
             }
 
             if (sharedData.mbcs.countStates == 1) {
-                if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+                if (!sharedData.mbcs.hasSupplementary()) {
                     cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
                 } else {
                     cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
@@ -2818,7 +2806,7 @@ class CharsetMBCS extends CharsetICU {
             for (b = 0; b <= 0xff; b++) {
                 entry = row[b];
                 if (MBCS_ENTRY_IS_TRANSITION(entry) && 
-                        hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {
+                        hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) {
                     return true;
                 }
             }
@@ -2829,9 +2817,9 @@ class CharsetMBCS extends CharsetICU {
             int[] row = stateTable[state];
             int entry = row[b];
             if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
-                return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));
+                return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry));
             } else {
-                short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                int action = MBCS_ENTRY_FINAL_ACTION(entry);
                 if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
                     return false;   /* SI/SO are illegal for DBCS-only conversion */
                 } else {
@@ -2866,6 +2854,8 @@ class CharsetMBCS extends CharsetICU {
             int sourceArrayIndex;
             char[] table;
             byte[] pArray, bytes;
+            char[] chars;
+            int[] ints;
             int pArrayIndex, outputType, c;
             int prevSourceIndex, sourceIndex, nextSourceIndex;
             int stage2Entry = 0, value = 0, length = 0, prevLength;
@@ -2908,12 +2898,15 @@ class CharsetMBCS extends CharsetICU {
                 }
 
                 table = sharedData.mbcs.fromUnicodeTable;
+                int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
                 sourceArrayIndex = source.position();
 
+                bytes = sharedData.mbcs.fromUnicodeBytes;
+                ints = sharedData.mbcs.fromUnicodeInts;
                 if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
-                    bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+                    chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
                 } else {
-                    bytes = sharedData.mbcs.fromUnicodeBytes;
+                    chars = sharedData.mbcs.fromUnicodeChars;
                 }
 
                 // asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
@@ -3054,7 +3047,7 @@ class CharsetMBCS extends CharsetICU {
                              * byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
                              * support zero byte output as a fallback, and also does not allow output of leading zeros.
                              */
-                            stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+                            stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
 
                             /* get the bytes and the length for the output */
                             switch (outputType) {
@@ -3078,8 +3071,8 @@ class CharsetMBCS extends CharsetICU {
                                  * callback function changed it for its output.
                                  */
                                 fromUnicodeStatus = prevLength; /* save the old state */
-                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+                                if (value <= 0xff) {
                                     if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
                                         /* no mapping, leave value==0 */
                                         length = 0;
@@ -3116,8 +3109,8 @@ class CharsetMBCS extends CharsetICU {
                                 break;
                             case MBCS_OUTPUT_DBCS_ONLY:
                                 /* table with single-byte results, but only DBCS mappings used */
-                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+                                if (value <= 0xff) {
                                     /* no mapping or SBCS result, not taken for DBCS-only */
                                     value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
                                     length = 0;
@@ -3131,30 +3124,33 @@ class CharsetMBCS extends CharsetICU {
                                 value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                if (value <= 0xff) {
                                     length = 1;
-                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                } else if (value <= 0xffff) {
                                     length = 2;
                                 } else {
                                     length = 3;
                                 }
                                 break;
                             case MBCS_OUTPUT_4:
-                                value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c);
+                                if (value < 0) {
+                                    // Half of the 4-byte values look negative in a signed int.
+                                    length = 4;
+                                } else if (value <= 0xff) {
                                     length = 1;
-                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                } else if (value <= 0xffff) {
                                     length = 2;
-                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
+                                } else if (value <= 0xffffff) {
                                     length = 3;
                                 } else {
                                     length = 4;
                                 }
                                 break;
                             case MBCS_OUTPUT_3_EUC:
-                                value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+                                value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
                                 /* EUC 16-bit fixed-length representation */
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                if (value <= 0xff) {
                                     length = 1;
                                 } else if ((value & 0x8000) == 0) {
                                     value |= 0x8e8000;
@@ -3173,9 +3169,9 @@ class CharsetMBCS extends CharsetICU {
                                         | ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
                                         | (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
                                 /* EUC 16-bit fixed-length representation applied to the first two bytes */
-                                if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                                if (value <= 0xff) {
                                     length = 1;
-                                } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+                                } else if (value <= 0xffff) {
                                     length = 2;
                                 } else if ((value & 0x800000) == 0) {
                                     value |= 0x8e800000;
@@ -3396,24 +3392,25 @@ class CharsetMBCS extends CharsetICU {
             int p;
 
             /* BMP-only codepages are stored without stage 1 entries for supplementary code points */
-            if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) {
+            if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) {
                 table = sharedData.mbcs.fromUnicodeTable;
 
                 /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
                 if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
-                    value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+                    value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
                     /* is this code point assigned, or do we use fallbacks? */
                     if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
                         pValue[0] = value & 0xff;
                         return 1;
                     }
                 } else /* outputType!=MBCS_OUTPUT_1 */{
-                    stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+                    int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
+                    stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
 
                     /* get the bytes and the length for the output */
                     switch (sharedData.mbcs.outputType) {
                     case MBCS_OUTPUT_2:
-                        value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);
+                        value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c);
                         if (value <= 0xff) {
                             length = 1;
                         } else {
@@ -3883,28 +3880,27 @@ class CharsetMBCS extends CharsetICU {
          * @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
          * to the target
          */
-        private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
+        private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
                 int length, boolean flush, CoderResult[] cr) {
             // ByteBuffer cx;
-            long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK;
 
             useSubChar1 = false;
 
             if (sharedData.mbcs.extIndexes != null
-                    && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) {
+                    && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) {
                 return 0; /* an extension mapping handled the input */
             }
 
             /* GB 18030 */
             if ((options & MBCS_OPTION_GB18030) != 0) {
-                long[] range;
+                int[] range;
                 int i;
 
                 for (i = 0; i < gb18030Ranges.length; ++i) {
                     range = gb18030Ranges[i];
                     if (range[0] <= cp && cp <= range[1]) {
                         /* found the Unicode code point, output the four-byte sequence for it */
-                        long linear;
+                        int linear;
                         byte bytes[] = new byte[4];
 
                         /* get the linear value of the first GB 18030 code in this range */
@@ -3996,7 +3992,7 @@ class CharsetMBCS extends CharsetICU {
             int sourceArrayIndex, lastSource;
             int targetCapacity, length;
             char[] table;
-            byte[] results;
+            char[] results;
 
             int c, sourceIndex;
             char value, minValue;
@@ -4007,12 +4003,9 @@ class CharsetMBCS extends CharsetICU {
             table = sharedData.mbcs.fromUnicodeTable;
 
             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
-                results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
-                // be a ByteBuffer so results can be a 16-bit view
-                // of it?
+                results = sharedData.mbcs.swapLFNLFromUnicodeChars;
             } else {
-                results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
-                // ByteBuffer so results can be a 16-bit view of it?
+                results = sharedData.mbcs.fromUnicodeChars;
             }
 
             if (useFallback) {
@@ -4164,7 +4157,7 @@ class CharsetMBCS extends CharsetICU {
             int sourceArrayIndex;
 
             char[] table;
-            byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array
+            char[] results;
 
             int c;
             int sourceIndex, nextSourceIndex;
@@ -4178,12 +4171,9 @@ class CharsetMBCS extends CharsetICU {
             table = sharedData.mbcs.fromUnicodeTable;
 
             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
-                results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
-                // be a ByteBuffer so results can be a 16-bit view
-                // of it?
+                results = sharedData.mbcs.swapLFNLFromUnicodeChars;
             } else {
-                results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
-                // ByteBuffer so results can be a 16-bit view of it?
+                results = sharedData.mbcs.fromUnicodeChars;
             }
 
             if (useFallback) {
@@ -4316,7 +4306,7 @@ class CharsetMBCS extends CharsetICU {
             int sourceArrayIndex;
 
             char[] table;
-            byte[] bytes;
+            char[] chars;
 
             int c, sourceIndex, nextSourceIndex;
 
@@ -4332,11 +4322,12 @@ class CharsetMBCS extends CharsetICU {
             sourceArrayIndex = source.position();
 
             table = sharedData.mbcs.fromUnicodeTable;
+            int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
 
             if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
-                bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+                chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
             } else {
-                bytes = sharedData.mbcs.fromUnicodeBytes;
+                chars = sharedData.mbcs.fromUnicodeChars;
             }
 
             /* get the converter state from UConverter */
@@ -4413,12 +4404,12 @@ class CharsetMBCS extends CharsetICU {
                         }
 
                         /* convert the Unicode code point in c into codepage bytes */
-                        stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+                        stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
 
                         /* get the bytes and the length for the output */
                         /* MBCS_OUTPUT_2 */
-                        value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
-                        if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+                        value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+                        if (value <= 0xff) {
                             length = 1;
                         } else {
                             length = 2;
@@ -4780,7 +4771,7 @@ class CharsetMBCS extends CharsetICU {
         
         mbcsTable = data.mbcs;
         table = mbcsTable.fromUnicodeTable; 
-        if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){
+        if(mbcsTable.hasSupplementary()){
             maxStage1 = 0x440;
         }
         else{
@@ -4791,9 +4782,8 @@ class CharsetMBCS extends CharsetICU {
         if(mbcsTable.outputType==MBCS_OUTPUT_1){
             char stage2, stage3;
             char minValue;
-            CharBuffer results;
-            results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
-                                   
+            char[] results = mbcsTable.fromUnicodeChars;
+
             if(which==ROUNDTRIP_SET) {
                 /* use only roundtrips */
                 minValue=0xf00;
@@ -4811,10 +4801,9 @@ class CharsetMBCS extends CharsetICU {
                             /*read the stage 3 block */
                             stage3 = (char)st3;
                             do {
-                                if(results.get(stage3++)>=minValue){
+                                if(results[stage3++]>=minValue){
                                      setFillIn.add(c);
                                 }
-                               
                             }while((++c&0xf) !=0);
                           } else {
                             c+= 16; /*empty stage 2 block */
@@ -4825,12 +4814,15 @@ class CharsetMBCS extends CharsetICU {
                 }
             }
         } else {
+            int[] tableInts = mbcsTable.fromUnicodeTableInts;
             int stage2,stage3;
             byte[] bytes;
             int st3Multiplier;
             int value;
             boolean useFallBack;
             bytes = mbcsTable.fromUnicodeBytes;
+            char[] chars = mbcsTable.fromUnicodeChars;
+            int[] ints = mbcsTable.fromUnicodeInts;
             useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
             switch(mbcsTable.outputType) {
             case MBCS_OUTPUT_3:
@@ -4844,49 +4836,41 @@ class CharsetMBCS extends CharsetICU {
                 st3Multiplier =2;
                 break;
             }
-            //ByteBuffer buffer = (ByteBuffer)charTobyte(table);
-            
+
             for(st1=0;st1<maxStage1;++st1){
                 st2 = table[st1]; 
                 if(st2>(maxStage1>>1)){
                     stage2 =  st2 ;
-                    for(st2=0;st2<128;++st2){
+                    for(st2=0;st2<64;++st2){
                         /*read the stage 3 block */
-                        st3 = table[stage2*2 + st2]<<16;
-                        st3+=table[stage2*2 + ++st2];
+                        st3 = tableInts[stage2 + st2];
                         if(st3!=0){
                         //if((st3=table[stage2+st2])!=0){
                             stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
-                            
+
                             /* get the roundtrip flags for the stage 3 block */
-                            st3>>=16;
-                            st3 &= UConverterConstants.UNSIGNED_SHORT_MASK;
+                            st3>>>=16;
                             switch(filter) {
                             case UCNV_SET_FILTER_NONE:
                                 do {
-                                    
                                    if((st3&1)!=0){
                                         setFillIn.add(c);
-                                        stage3+=st3Multiplier;
                                    }else if (useFallBack) {
-                                        
-                                        char b =0;
+                                        int b =0;
                                         switch(st3Multiplier) {
-                                        case 4 :
-                                           
-                                            b|= ByteBuffer.wrap(bytes).getChar(stage3++);
-                                           
-                                        case 3 :
-                                            
-                                            b|= ByteBuffer.wrap(bytes).getChar(stage3++);
-                                           
-                                        case 2 :
-                                           
-                                            b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1);
-                                            stage3+=2;
+                                        case 4:
+                                            b = ints[stage3 / 4];
+                                            break;
+                                        case 3:
+                                            b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2];
+                                            break;
+                                        case 2:
+                                            b = chars[stage3 / 2];
+                                            break;
                                         default:
                                             break;
                                         }
+                                        stage3+=st3Multiplier;
                                         if(b!=0) {
                                             setFillIn.add(c);
                                         }
@@ -4897,8 +4881,7 @@ class CharsetMBCS extends CharsetICU {
                             case UCNV_SET_FILTER_DBCS_ONLY:
                                 /* Ignore single bytes results (<0x100). */
                                 do {
-                                    if(((st3&1) != 0 || useFallBack) && 
-                                            (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){
+                                    if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){
                                         setFillIn.add(c);
                                     }
                                     st3>>=1;
@@ -4909,7 +4892,7 @@ class CharsetMBCS extends CharsetICU {
                                 /* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
                                 do {
                                     if(((st3&1) != 0 || useFallBack) && 
-                                            ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){
+                                            ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){
                                         setFillIn.add(c);
                                     }
                                     st3>>=1;
@@ -4919,8 +4902,7 @@ class CharsetMBCS extends CharsetICU {
                             case UCNV_SET_FILTER_SJIS:
                                 /* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
                                 do{
-                                    
-                                    if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){
+                                    if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){
                                         setFillIn.add(c);
                                     }
                                     st3>>=1;
@@ -4931,7 +4913,7 @@ class CharsetMBCS extends CharsetICU {
                                 /* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
                                 do {
                                     if(((st3&1) != 0 || useFallBack) && 
-                                            (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) && 
+                                            (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) && 
                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
                                         setFillIn.add(c);
                                     }
@@ -4943,7 +4925,7 @@ class CharsetMBCS extends CharsetICU {
                                 /*Only add code points that are suitable for HZ DBCS*/
                                 do {
                                     if( ((st3&1) != 0 || useFallBack) && 
-                                            (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) &&
+                                            (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) &&
                                             (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
                                         setFillIn.add(c);
                                     }
@@ -5056,7 +5038,7 @@ class CharsetMBCS extends CharsetICU {
                     if(st3!= 0){
                         ps3 = st3;
                         do {
-                            value = stage3b.get(UConverterConstants.UNSIGNED_SHORT_MASK&stage3.get(ps3++));
+                            value = stage3b.get(stage3.get(ps3++));
                             if(value==0){
                                 /* no mapping do nothing */
                             }else if (FROM_U_IS_PARTIAL(value)){
@@ -5078,14 +5060,13 @@ class CharsetMBCS extends CharsetICU {
                                     }
                                     break;
                                 case UCNV_SET_FILTER_GR94DBCS:
-                                    if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1) 
+                                    if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1)
                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
-                                        
                                         continue;
                                     }
                                     break;
                                 case UCNV_SET_FILTER_HZ:
-                                    if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1) 
+                                    if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1)
                                             && (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
                                         continue;
                                     }
index 4bc4921cac982e67b824546c8eb71590f7c0ba0a..2d53b887e790d4950234e92cc30900859d2635f7 100644 (file)
 package com.ibm.icu.charset;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 
 import com.ibm.icu.impl.ICUBinary;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
 
 final class UConverterAlias {
     static final int UNNORMALIZED = 0;
@@ -115,13 +112,12 @@ final class UConverterAlias {
         return (alias.length() != 0);
     }
 
-    private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
+    private static final String CNVALIAS_DATA_FILE_NAME = "cnvalias.icu";
 
     private static final synchronized boolean haveAliasData() 
                                                throws IOException{
         boolean needInit;
 
-        // agljport:todo umtx_lock(NULL);
         needInit = gAliasData == null;
 
         /* load converter alias data from file if necessary */
@@ -129,10 +125,8 @@ final class UConverterAlias {
             ByteBuffer data = null;
             int[] tableArray = null;
             int tableStart;
-            //byte[] reservedBytes = null;
 
-            InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
-            ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i);
+            ByteBuffer b = ICUBinary.getRequiredData(CNVALIAS_DATA_FILE_NAME);
             UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
             tableArray = reader.readToc(offsetsCount);
 
@@ -160,21 +154,10 @@ final class UConverterAlias {
             if (gOptionTable[0] != STD_NORMALIZED) {
                 throw new IOException("Unsupported alias normalization");
             }
-            
-            // agljport:todo umtx_lock(NULL);
+
             if (gAliasData == null) {
                 gAliasData = data;
                 data = null;
-
-                // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
-                // io_cleanup);
-            }
-            // agljport:todo umtx_unlock(NULL);
-
-            /* if a different thread set it first, then close the extra data */
-            if (data != null) {
-                // agljport:fix udata_close(data); /* NULL if it was set
-                // correctly */
             }
         }
 
index 32d8e4e953722b81f243ec6d386e5d706b44c2fd..ded61b68fdb4bcc7f218e5bd9030b8313882a5dd 100644 (file)
@@ -9,8 +9,14 @@ package com.ibm.icu.charset;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
 
+import com.ibm.icu.charset.CharsetMBCS.MBCSHeader;
+import com.ibm.icu.charset.CharsetMBCS.MBCSToUFallback;
+import com.ibm.icu.charset.CharsetMBCS.UConverterMBCSTable;
 import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.InvalidFormatException;
 
 /**
  * ucnvmbcs.h
@@ -395,9 +401,17 @@ import com.ibm.icu.impl.ICUBinary;
  *   Indexes and lengths stored in the fromUTableValues[].
  */
 
-final class UConverterDataReader implements ICUBinary.Authenticate {
+final class UConverterDataReader {
     //private final static boolean debug = ICUDebug.enabled("UConverterDataReader");
 
+    private static final class IsAcceptable implements ICUBinary.Authenticate {
+        // @Override when we switch to Java 6
+        public boolean isDataVersionAcceptable(byte formatVersion[]) {
+            return formatVersion[0] == 6;
+        }
+    }
+    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
     /*
      *  UConverterDataReader(UConverterDataReader r)
         {
@@ -405,10 +419,8 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
             unicodeVersion = r.unicodeVersion;
         }
         */
-   /* the number bytes read from the buffer */
-   int bytesRead = 0;
-   /* the number of bytes read for static data */
-   int staticDataBytesRead = 0;
+    /** The buffer position after the static data. */
+    private int posAfterStaticData;
 
    /**
     * <p>Protected constructor.</p>
@@ -420,7 +432,7 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
         //if(debug) System.out.println("Bytes in buffer " + bytes.remaining());
 
         byteBuffer = bytes;
-        /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, this);
+        /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, IS_ACCEPTABLE);
 
         //if(debug) System.out.println("Bytes left in byteBuffer " + byteBuffer.remaining());
     }
@@ -429,95 +441,137 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
 
     protected void readStaticData(UConverterStaticData sd) throws IOException
     {
-        int bRead = 0;
         sd.structSize = byteBuffer.getInt();
-        bRead +=4;
         byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH];
         byteBuffer.get(name);
-        bRead +=name.length;
-        sd.name = new String(name, 0, name.length);
+        sd.name = new String(name, "US-ASCII");
         sd.codepage = byteBuffer.getInt();
-        bRead +=4;
         sd.platform = byteBuffer.get();
-        bRead++;
         sd.conversionType = byteBuffer.get();
-        bRead++;
         sd.minBytesPerChar = byteBuffer.get();
-        bRead++;
         sd.maxBytesPerChar = byteBuffer.get();
-        bRead++;
         byteBuffer.get(sd.subChar);
-        bRead += sd.subChar.length;
         sd.subCharLen = byteBuffer.get();
-        bRead++;
         sd.hasToUnicodeFallback = byteBuffer.get();
-        bRead++;
         sd.hasFromUnicodeFallback = byteBuffer.get();
-        bRead++;
         sd.unicodeMask = (short)(byteBuffer.get() & 0xff);
-        bRead++;
         sd.subChar1 = byteBuffer.get();
-        bRead++;
         byteBuffer.get(sd.reserved);
-        bRead += sd.reserved.length;
-        staticDataBytesRead = bRead;
-        bytesRead += bRead;
+        posAfterStaticData = byteBuffer.position();
+    }
+
+    int bytesReadAfterStaticData() {
+        return byteBuffer.position() - posAfterStaticData;
     }
 
     protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException
     {
         byteBuffer.get(h.version);
-        bytesRead += h.version.length;
         h.countStates = byteBuffer.getInt();
-        bytesRead+=4;
         h.countToUFallbacks = byteBuffer.getInt();
-        bytesRead+=4;
         h.offsetToUCodeUnits = byteBuffer.getInt();
-        bytesRead+=4;
         h.offsetFromUTable = byteBuffer.getInt();
-        bytesRead+=4;
         h.offsetFromUBytes = byteBuffer.getInt();
-        bytesRead+=4;
         h.flags = byteBuffer.getInt();
-        bytesRead+=4;
         h.fromUBytesLength = byteBuffer.getInt();
-        bytesRead+=4;
         if (h.version[0] == 5 && h.version[1] >= 3) {
             h.options = byteBuffer.getInt();
-            bytesRead+=4;
             if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) {
                 h.fullStage2Length = byteBuffer.getInt();
-                bytesRead+=4;
             }
         }
     }
-    
-    protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException
+
+    protected void readMBCSTable(MBCSHeader header, UConverterMBCSTable mbcsTable) throws IOException
     {
-        int i, j;
-        for(i = 0; i < stateTableArray.length; ++i){
-            for(j = 0; j < stateTableArray[i].length; ++j){
-                stateTableArray[i][j] = byteBuffer.getInt();
-                bytesRead+=4;
-            }
+        IntBuffer intBuffer = byteBuffer.asIntBuffer();
+        mbcsTable.countStates = (byte) header.countStates;
+        mbcsTable.stateTable = new int[header.countStates][256];
+        int i;
+        for(i = 0; i < header.countStates; ++i) {
+            intBuffer.get(mbcsTable.stateTable[i]);
         }
-        for(i = 0; i < toUFallbacksArray.length; ++i) {
-            toUFallbacksArray[i].offset = byteBuffer.getInt();
-            bytesRead+=4;
-            toUFallbacksArray[i].codePoint = byteBuffer.getInt();
-            bytesRead+=4;
+
+        mbcsTable.countToUFallbacks = header.countToUFallbacks;
+        mbcsTable.toUFallbacks = new MBCSToUFallback[header.countToUFallbacks];
+        for(i = 0; i < header.countToUFallbacks; ++i) {
+            int offset = intBuffer.get();
+            int codePoint = intBuffer.get();
+            mbcsTable.toUFallbacks[i] = new MBCSToUFallback(offset, codePoint);
         }
-        for(i = 0; i < unicodeCodeUnitsArray.length; ++i){
-            unicodeCodeUnitsArray[i] = byteBuffer.getChar();
-            bytesRead+=2;
+        // Skip as many bytes as we have read from the IntBuffer.
+        int length = intBuffer.position() * 4;
+        ICUBinary.skipBytes(byteBuffer, length);
+
+        // Consider leaving some large arrays as CharBuffer/IntBuffer rather than
+        // reading them into Java arrays, to reduce initialization time and memory usage,
+        // at the cost of some performance.
+        // For example: unicodeCodeUnits, fromUnicodeTable, fromUnicodeInts.
+        // Take care not to modify the buffer contents for swaplfnl.
+        CharBuffer charBuffer = byteBuffer.asCharBuffer();
+        length = header.offsetFromUTable - header.offsetToUCodeUnits;
+        assert (length & 1) == 0;
+        mbcsTable.unicodeCodeUnits = new char[length / 2];
+        charBuffer.get(mbcsTable.unicodeCodeUnits);
+        // Skip as many bytes as we have read from the CharBuffer.
+        ICUBinary.skipBytes(byteBuffer, length);
+
+        length = header.offsetFromUBytes - header.offsetFromUTable;
+        assert (length & 1) == 0;
+        int fromUTableCharsLength;
+        if (mbcsTable.outputType == CharsetMBCS.MBCS_OUTPUT_1) {
+            // single-byte table stage1 + stage2
+            fromUTableCharsLength = length / 2;
+        } else if (mbcsTable.hasSupplementary()) {
+            // stage1 for Unicode limit 0x110000 >> 10
+            fromUTableCharsLength = 0x440;
+        } else {
+            // stage1 for BMP limit 0x10000 >> 10
+            fromUTableCharsLength = 0x40;
         }
-        for(i = 0; i < fromUnicodeTableArray.length; ++i){
-            fromUnicodeTableArray[i] = byteBuffer.getChar();
-            bytesRead+=2;
+        mbcsTable.fromUnicodeTable = new char[fromUTableCharsLength];
+        charBuffer.get(mbcsTable.fromUnicodeTable);
+        if (mbcsTable.outputType != CharsetMBCS.MBCS_OUTPUT_1) {
+            // Read both stage1 and stage2 together into an int[] array.
+            // Keeping the short stage1 in the array avoids offsetting at runtime.
+            // The stage1 part of this array will not be used.
+            assert (length & 3) == 0;
+            mbcsTable.fromUnicodeTableInts = new int[length / 4];
+            byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeTableInts);
         }
-        for(i = 0; i < fromUnicodeBytesArray.length; ++i){
-            fromUnicodeBytesArray[i] = byteBuffer.get();
-            bytesRead++;
+        // Skip as many bytes as are in stage1 + stage2.
+        ICUBinary.skipBytes(byteBuffer, length);
+
+        mbcsTable.fromUBytesLength = header.fromUBytesLength;
+        boolean noFromU = ((header.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0);
+        if (!noFromU) {
+            switch (mbcsTable.outputType) {
+            case CharsetMBCS.MBCS_OUTPUT_1:
+            case CharsetMBCS.MBCS_OUTPUT_2:
+            case CharsetMBCS.MBCS_OUTPUT_2_SISO:
+            case CharsetMBCS.MBCS_OUTPUT_3_EUC:
+                mbcsTable.fromUnicodeChars = new char[header.fromUBytesLength / 2];
+                byteBuffer.asCharBuffer().get(mbcsTable.fromUnicodeChars);
+                ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~1);
+                break;
+            case CharsetMBCS.MBCS_OUTPUT_3:
+            case CharsetMBCS.MBCS_OUTPUT_4_EUC:
+                mbcsTable.fromUnicodeBytes = new byte[header.fromUBytesLength];
+                byteBuffer.get(mbcsTable.fromUnicodeBytes);
+                break;
+            case CharsetMBCS.MBCS_OUTPUT_4:
+                mbcsTable.fromUnicodeInts = new int[header.fromUBytesLength / 4];
+                byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeInts);
+                ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~3);
+                break;
+            default:
+                // Cannot occur, caller checked already.
+                assert false;
+            }
+        } else {
+            // Optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher.
+            // Needed for reconstituting omitted data.
+            mbcsTable.mbcsIndex = byteBuffer.asCharBuffer();
         }
     }
 
@@ -527,60 +581,33 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
         StringBuilder name = new StringBuilder();
         while((c = (char)byteBuffer.get()) !=  0){
             name.append(c);
-            bytesRead++;
         }
-        bytesRead++/*for null terminator*/;
         return name.toString();
     }
 
     //protected int[] readExtIndexes(int skip) throws IOException
-    protected ByteBuffer readExtIndexes(int skip) throws IOException
+    protected ByteBuffer readExtIndexes(int skip) throws IOException, InvalidFormatException
     {
         ICUBinary.skipBytes(byteBuffer, skip);
-        int n = byteBuffer.getInt();
-        bytesRead+=4;
-        int[] indexes = new int[n];
-        indexes[0] = n;
-        for(int i = 1; i < n; ++i) {
-            indexes[i] = byteBuffer.getInt();
-            bytesRead+=4;
+        ByteBuffer b = ICUBinary.sliceWithOrder(byteBuffer);
+        int lengthOfIndexes = b.getInt(0);
+        if (lengthOfIndexes < 32) {
+            throw new InvalidFormatException();
         }
-        //return indexes;
-
-        ByteBuffer b = ByteBuffer.allocate(indexes[31]);
-        for(int i = 0; i < n; ++i) {
-            b.putInt(indexes[i]);
-        }
-        int len = b.remaining();
-        byteBuffer.get(b.array(), b.position(), len);
-        bytesRead += len;
+        int numBytesExtensionStructure = b.getInt(31 * 4);
+        b.limit(numBytesExtensionStructure);
+        ICUBinary.skipBytes(byteBuffer, numBytesExtensionStructure);
         return b;
     }
 
-    /*protected byte[] readExtTables(int n) throws IOException
-    {
-        byte[] tables = new byte[n];
-        int len = byteBuffer.get(tables);
-        if(len==-1){
-            throw new IOException("Read failed");
-        }
-        bytesRead += len;
-        return tables;
-    }*/
-
-    byte[] getDataFormatVersion(){
-        return DATA_FORMAT_VERSION;
-    }
     /**
-     * Inherited method
+     * Data formatVersion 6.1 and higher has a unicodeMask.
      */
-    public boolean isDataVersionAcceptable(byte version[]){
-        return version[0] == DATA_FORMAT_VERSION[0];
+    boolean dataFormatHasUnicodeMask() {
+        int formatVersion0 = byteBuffer.get(16) & 0xff;
+        return formatVersion0 > 6 || (formatVersion0 == 6 && byteBuffer.get(17) != 0);
     }
-    
-/*    byte[] getUnicodeVersion(){
-        return unicodeVersion;    
-    }*/
+
     // private data members -------------------------------------------------
 
     /**
@@ -597,5 +624,4 @@ final class UConverterDataReader implements ICUBinary.Authenticate {
     */
     // DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c)
     private static final int DATA_FORMAT_ID = 0x636e7674; // dataFormat="cnvt"
-    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6};
 }
index 13c0071f736939652735846b2b9fee2d1f37b793..185fe55bc2d0e4963f5b11242a1a670deeabe899 100644 (file)
 package com.ibm.icu.impl.coll;
 
 import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
 import java.util.MissingResourceException;
 
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
 
 /**
  * Collation root provider.
@@ -42,20 +41,20 @@ public final class CollationRoot {  // purely static
     }
 
     static {  // Corresponds to C++ load() function.
-        CollationTailoring t = new CollationTailoring(null);
-        // TODO: Optionally load from a .dat file or stand-alone .icu file.
-        String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
-        InputStream is = ICUData.getRequiredStream(path);
+        CollationTailoring t = null;
         RuntimeException e2 = null;
         try {
-            CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t);
+            ByteBuffer bytes = ICUBinary.getRequiredData("coll/ucadata.icu");
+            CollationTailoring t2 = new CollationTailoring(null);
+            CollationDataReader.read(null, bytes, t2);
+            // Keep t=null until after the root data has been read completely.
+            // Otherwise we would set a non-null root object if the data reader throws an exception.
+            t = t2;
         } catch(IOException e) {
-            t = null;
             e2 = new MissingResourceException(
                     "IOException while reading CLDR root data",
-                    "CollationRoot", path);
+                    "CollationRoot", ICUData.ICU_BUNDLE + "/coll/ucadata.icu");
         } catch(RuntimeException e) {
-            t = null;
             e2 = e;
         }
         rootSingleton = t;
index 171ebed73ea09355496521fb719909325eb7fda8..9c487993cb156ca28a4d8f990f7455bf28611c97 100644 (file)
@@ -3,7 +3,7 @@
 #* Copyright (C) 2008-2014, International Business Machines Corporation and    *
 #* others. All Rights Reserved.                                                *
 #*******************************************************************************
-#* This is the properties contains ICU runtime configuration 
+#* This is the properties file which contains ICU runtime configuration.
 #*
 
 #
@@ -20,6 +20,7 @@ com.ibm.icu.util.TimeZone.DefaultTimeZoneType = ICU
 com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
 
 #
+# [Internal Use Only]
 # By default, DecimalFormat uses some internal equivalent character
 # data in addition to ones in DecimalFormatSymbols for parsing
 # decimal/grouping separators.  When this property is true,
@@ -29,8 +30,18 @@ com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
 # @internal
 com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
 
+# File system path where ICU looks for binary data files.
+# If not empty, then ICU looks for binary data files before looking for data on the classpath.
+# This string may contain multiple paths, see File.pathSeparatorChar.
+# Spaces (U+0020) around each path are trimmed away. Empty paths are ignored.
+# There may be individual files, for example, zoneinfo64.res,
+# or ICU4C .dat package files, for example, collation.dat or icudt54l.dat.
+# Each ICU data file may contain little-endian or big-endian data.
+# Each ICU data file's charset must be ASCII. (Platform type 'l' or 'b' but not 'e'.)
+# @draft ICU 54
+com.ibm.icu.impl.ICUBinary.dataPath =
 
-# 
+#
 # [Internal Use Only]
 # Disable resource path scan for building full locale name list
 # at run time.
index b345d74f03884a52bdf822c7a319b1c2b2966f74..faec76a3d481140d9676113b2b69730835bedab3 100644 (file)
 
 package com.ibm.icu.impl;
 
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.MissingResourceException;
 
+import com.ibm.icu.util.ICUUncheckedIOException;
 import com.ibm.icu.util.VersionInfo;
 
-public final class ICUBinary 
-{    
+public final class ICUBinary {
+    /**
+     * Reads the ICU .dat package file format.
+     * Most methods do not modify the ByteBuffer in any way,
+     * not even its position or other state.
+     */
+    private static final class DatPackageReader {
+        /**
+         * .dat package data format ID "CmnD".
+         */
+        private static final int DATA_FORMAT = 0x436d6e44;
+
+        private static final class IsAcceptable implements Authenticate {
+            // @Override when we switch to Java 6
+            public boolean isDataVersionAcceptable(byte version[]) {
+                return version[0] == 1;
+            }
+        }
+        private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
+        /**
+         * Checks that the ByteBuffer contains a valid, usable ICU .dat package.
+         * Moves the buffer position from 0 to after the data header.
+         */
+        private static boolean validate(ByteBuffer bytes) {
+            try {
+                readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
+            } catch (IOException ignored) {
+                return false;
+            }
+            int count = bytes.getInt(bytes.position());  // Do not move the position.
+            if (count <= 0) {
+                return false;
+            }
+            // For each item, there is one ToC entry (8 bytes) and a name string
+            // and a data item of at least 16 bytes.
+            // (We assume no data item duplicate elimination for now.)
+            if (bytes.position() + 4 + count * (8 + 16) > bytes.capacity()) {
+                return false;
+            }
+            if (!startsWithPackageName(bytes, getNameOffset(bytes, 0)) ||
+                    !startsWithPackageName(bytes, getNameOffset(bytes, count - 1))) {
+                return false;
+            }
+            return true;
+        }
+
+        private static boolean startsWithPackageName(ByteBuffer bytes, int start) {
+            // Compare all but the trailing 'b' or 'l' which depends on the platform.
+            int length = ICUData.PACKAGE_NAME.length() - 1;
+            for (int i = 0; i < length; ++i) {
+                if (bytes.get(start + i) != ICUData.PACKAGE_NAME.charAt(i)) {
+                    return false;
+                }
+            }
+            // Check for 'b' or 'l' followed by '/'.
+            byte c = bytes.get(start + length++);
+            if ((c != 'b' && c != 'l') || bytes.get(start + length) != '/') {
+                return false;
+            }
+            return true;
+        }
+
+        private static ByteBuffer getData(ByteBuffer bytes, CharSequence key) {
+            int base = bytes.position();
+            int count = bytes.getInt(base);
+
+            // Do a binary search for the key.
+            int start = 0;
+            int limit = count;
+            while (start < limit) {
+                int mid = (start + limit) >>> 1;
+                int nameOffset = getNameOffset(bytes, mid);
+                // Skip "icudt54b/".
+                nameOffset += ICUData.PACKAGE_NAME.length() + 1;
+                int result = compareKeys(key, bytes, nameOffset);
+                if (result < 0) {
+                    limit = mid;
+                } else if (result > 0) {
+                    start = mid + 1;
+                } else {
+                    // We found it!
+                    ByteBuffer data = bytes.duplicate();
+                    data.position(getDataOffset(bytes, mid));
+                    data.limit(getDataOffset(bytes, mid + 1));
+                    return ICUBinary.sliceWithOrder(data);
+                }
+            }
+            return null;  // Not found or table is empty.
+        }
+
+        private static int getNameOffset(ByteBuffer bytes, int index) {
+            int base = bytes.position();
+            assert 0 <= index && index < bytes.getInt(base);  // count
+            // The count integer (4 bytes)
+            // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
+            return base + bytes.getInt(base + 4 + index * 8);
+        }
+
+        private static int getDataOffset(ByteBuffer bytes, int index) {
+            int base = bytes.position();
+            int count = bytes.getInt(base);
+            if (index == count) {
+                // Return the limit of the last data item.
+                return bytes.capacity();
+            }
+            assert 0 <= index && index < count;
+            // The count integer (4 bytes)
+            // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
+            // The dataOffset follows the nameOffset (skip another 4 bytes).
+            return base + bytes.getInt(base + 4 + 4 + index * 8);
+        }
+    }
+
+    private static final class DataFile {
+        public final String itemPath;
+        /**
+         * null if a .dat package.
+         */
+        public final File path;
+        /**
+         * .dat package bytes, or null if not a .dat package.
+         * position() is after the header.
+         * Do not modify the position or other state, for thread safety.
+         */
+        public final ByteBuffer pkgBytes;
+
+        public DataFile(String item, File path) {
+            itemPath = item;
+            this.path = path;
+            pkgBytes = null;
+        }
+        public DataFile(String item, ByteBuffer bytes) {
+            itemPath = item;
+            path = null;
+            pkgBytes = bytes;
+        }
+        public String toString() {
+            return path.toString();
+        }
+    }
+    private static final List<DataFile> icuDataFiles = new ArrayList<DataFile>();
+
+    static {
+        // Normally com.ibm.icu.impl.ICUBinary.dataPath.
+        String dataPath = ICUConfig.get(ICUBinary.class.getName() + ".dataPath");
+        if (dataPath != null) {
+            addDataFilesFromPath(dataPath, icuDataFiles);
+        }
+    }
+
+    private static void addDataFilesFromPath(String dataPath, List<DataFile> files) {
+        // Split the path and find files in each location.
+        // This splitting code avoids the regex pattern compilation in String.split()
+        // and its array allocation.
+        // (There is no simple by-character split()
+        // and the StringTokenizer "is discouraged in new code".)
+        int pathStart = 0;
+        while (pathStart < dataPath.length()) {
+            int sepIndex = dataPath.indexOf(File.pathSeparatorChar, pathStart);
+            int pathLimit;
+            if (sepIndex >= 0) {
+                pathLimit = sepIndex;
+            } else {
+                pathLimit = dataPath.length();
+            }
+            String path = dataPath.substring(pathStart, pathLimit).trim();
+            if (path.endsWith(File.separator)) {
+                path = path.substring(0, path.length() - 1);
+            }
+            if (path.length() != 0) {
+                addDataFilesFromFolder(new File(path), new StringBuilder(), icuDataFiles);
+            }
+            if (sepIndex < 0) {
+                break;
+            }
+            pathStart = sepIndex + 1;
+        }
+    }
+
+    private static void addDataFilesFromFolder(File folder, StringBuilder itemPath,
+            List<DataFile> dataFiles) {
+        File[] files = folder.listFiles();
+        if (files == null || files.length == 0) {
+            return;
+        }
+        int folderPathLength = itemPath.length();
+        if (folderPathLength > 0) {
+            // The item path must use the ICU file separator character,
+            // not the platform-dependent File.separatorChar,
+            // so that the enumerated item paths match the paths requested by ICU code.
+            itemPath.append('/');
+            ++folderPathLength;
+        }
+        for (File file : files) {
+            String fileName = file.getName();
+            if (fileName.endsWith(".txt")) {
+                continue;
+            }
+            itemPath.append(fileName);
+            if (file.isDirectory()) {
+                // TODO: Within a folder, put all single files before all .dat packages?
+                addDataFilesFromFolder(file, itemPath, dataFiles);
+            } else if (fileName.endsWith(".dat")) {
+                ByteBuffer pkgBytes = mapFile(file);
+                if (pkgBytes != null && DatPackageReader.validate(pkgBytes)) {
+                    dataFiles.add(new DataFile(itemPath.toString(), pkgBytes));
+                }
+            } else {
+                dataFiles.add(new DataFile(itemPath.toString(), file));
+            }
+            itemPath.setLength(folderPathLength);
+        }
+    }
+
+    /**
+     * Compares the length-specified input key with the
+     * NUL-terminated table key. (ASCII)
+     */
+    static int compareKeys(CharSequence key, ByteBuffer bytes, int offset) {
+        for (int i = 0;; ++i, ++offset) {
+            int c2 = bytes.get(offset);
+            if (c2 == 0) {
+                if (i == key.length()) {
+                    return 0;
+                } else {
+                    return 1;  // key > table key because key is longer.
+                }
+            } else if (i == key.length()) {
+                return -1;  // key < table key because key is shorter.
+            }
+            int diff = (int)key.charAt(i) - c2;
+            if (diff != 0) {
+                return diff;
+            }
+        }
+    }
+
     // public inner interface ------------------------------------------------
-    
+
     /**
      * Special interface for data authentication
      */
@@ -34,10 +279,131 @@ public final class ICUBinary
     
     // public methods --------------------------------------------------------
 
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @return The data as a read-only ByteBuffer,
+     *         or null if the resource could not be found.
+     */
+    public static ByteBuffer getData(String itemPath) {
+        return getData(null, null, itemPath, false);
+    }
+
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+     * @param resourceName Resource name for use with the loader.
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @return The data as a read-only ByteBuffer,
+     *         or null if the resource could not be found.
+     */
+    public static ByteBuffer getData(ClassLoader loader, String resourceName, String itemPath) {
+        return getData(loader, resourceName, itemPath, false);
+    }
+
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @return The data as a read-only ByteBuffer.
+     * @throws MissingResourceException if required==true and the resource could not be found
+     */
+    public static ByteBuffer getRequiredData(String itemPath) {
+        return getData(null, null, itemPath, true);
+    }
+
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+     * @param resourceName Resource name for use with the loader.
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @return The data as a read-only ByteBuffer.
+     * @throws MissingResourceException if required==true and the resource could not be found
+     */
+//    public static ByteBuffer getRequiredData(ClassLoader loader, String resourceName,
+//            String itemPath) {
+//        return getData(loader, resourceName, itemPath, true);
+//    }
+
+    /**
+     * Loads an ICU binary data file and returns it as a ByteBuffer.
+     * The buffer contents is normally read-only, but its position etc. can be modified.
+     *
+     * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+     * @param resourceName Resource name for use with the loader.
+     * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+     * @param required If the resource cannot be found,
+     *        this method returns null (!required) or throws an exception (required).
+     * @return The data as a read-only ByteBuffer,
+     *         or null if required==false and the resource could not be found.
+     * @throws MissingResourceException if required==true and the resource could not be found
+     */
+    private static ByteBuffer getData(ClassLoader loader, String resourceName,
+            String itemPath, boolean required) {
+        ByteBuffer bytes = getDataFromFile(itemPath);
+        if (bytes != null) {
+            return bytes;
+        }
+        if (loader == null) {
+            loader = ICUData.class.getClassLoader();
+        }
+        if (resourceName == null) {
+            resourceName = ICUData.ICU_BASE_NAME + '/' + itemPath;
+        }
+        InputStream is = ICUData.getStream(loader, resourceName, required);
+        if (is == null) {
+            return null;
+        }
+        try {
+            return getByteBufferFromInputStream(is);
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
+        }
+    }
+
+    private static ByteBuffer getDataFromFile(String itemPath) {
+        for (DataFile dataFile : icuDataFiles) {
+            if (dataFile.pkgBytes != null) {
+                ByteBuffer data = DatPackageReader.getData(dataFile.pkgBytes, itemPath);
+                if (data != null) {
+                    return data;
+                }
+            } else if (itemPath.equals(dataFile.itemPath)) {
+                return mapFile(dataFile.path);
+            }
+        }
+        return null;
+    }
+
+    private static ByteBuffer mapFile(File path) {
+        FileInputStream file;
+        try {
+            file = new FileInputStream(path);
+            FileChannel channel = file.getChannel();
+            ByteBuffer bytes = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
+            // Close the file and its channel; this seems to keep the ByteBuffer valid.
+            // If not, then we will need to return the pair of (file, bytes).
+            file.close();
+            return bytes;
+        } catch(FileNotFoundException ignored) {
+            System.err.println(ignored);
+        } catch (IOException ignored) {
+            System.err.println(ignored);
+        }
+        return null;
+    }
+
     /**
      * Same as readHeader(), but returns a VersionInfo rather than a compact int.
      */
-    public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
+    public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
                                                              int dataFormat,
                                                              Authenticate authenticate)
                                                                 throws IOException {
@@ -56,7 +422,7 @@ public final class ICUBinary
      * @return dataVersion
      * @throws IOException if this is not a valid ICU data item of the expected dataFormat
      */
-    public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
+    public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
             throws IOException {
         assert bytes.position() == 0;
         byte magic1 = bytes.get(2);
@@ -89,7 +455,11 @@ public final class ICUBinary
                 bytes.get(14) != (byte)(dataFormat >> 8) ||
                 bytes.get(15) != (byte)dataFormat ||
                 (authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
-            throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+            throw new IOException(HEADER_AUTHENTICATION_FAILED_ +
+                    String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d",
+                            bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15),
+                            formatVersion[0] & 0xff, formatVersion[1] & 0xff,
+                            formatVersion[2] & 0xff, formatVersion[3] & 0xff));
         }
 
         bytes.position(headerSize);
@@ -100,17 +470,54 @@ public final class ICUBinary
                 (bytes.get(23) & 0xff);
     }
 
-    public static final void skipBytes(ByteBuffer bytes, int skipLength) {
+    /**
+     * Writes an ICU data header.
+     * Does not write a copyright string.
+     *
+     * @return The length of the header (number of bytes written).
+     * @throws IOException from the DataOutputStream
+     */
+    public static int writeHeader(int dataFormat, int formatVersion, int dataVersion,
+            DataOutputStream dos) throws IOException {
+        // ucmndata.h MappedData
+        dos.writeChar(32);  // headerSize
+        dos.writeByte(MAGIC1);
+        dos.writeByte(MAGIC2);
+        // unicode/udata.h UDataInfo
+        dos.writeChar(20);  // sizeof(UDataInfo)
+        dos.writeChar(0);  // reservedWord
+        dos.writeByte(1);  // isBigEndian
+        dos.writeByte(CHAR_SET_);  // charsetFamily
+        dos.writeByte(CHAR_SIZE_);  // sizeofUChar
+        dos.writeByte(0);  // reservedByte
+        dos.writeInt(dataFormat);
+        dos.writeInt(formatVersion);
+        dos.writeInt(dataVersion);
+        // 8 bytes padding for 32 bytes headerSize (multiple of 16).
+        dos.writeLong(0);
+        assert dos.size() == 32;
+        return 32;
+    }
+
+    public static void skipBytes(ByteBuffer bytes, int skipLength) {
         if (skipLength > 0) {
             bytes.position(bytes.position() + skipLength);
         }
     }
 
+    /**
+     * Same as ByteBuffer.slice() plus preserving the byte order.
+     */
+    public static ByteBuffer sliceWithOrder(ByteBuffer bytes) {
+        ByteBuffer b = bytes.slice();
+        return b.order(bytes.order());
+    }
+
     /**
      * Reads the entire contents from the stream into a byte array
      * and wraps it into a ByteBuffer. Closes the InputStream at the end.
      */
-    public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
+    public static ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
         try {
             int avail = is.available();
             byte[] bytes = new byte[avail];
@@ -128,7 +535,7 @@ public final class ICUBinary
         }
     }
 
-    private static final void readFully(InputStream is, byte[] bytes, int offset, int avail)
+    private static void readFully(InputStream is, byte[] bytes, int offset, int avail)
             throws IOException {
         while (avail > 0) {
             int numRead = is.read(bytes, offset, avail);
index b47b278db85af24fae374ac28fbf9a42287611d2..b5fa3e14acf82f60d46b12a3d81a8104a6abf60d 100644 (file)
@@ -1,7 +1,7 @@
 /*
  *******************************************************************************
- * Copyright (C) 2004-2009, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
+ * Copyright (C) 2004-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  *******************************************************************************
  *
  * Created on Feb 4, 2004
@@ -9,22 +9,83 @@
  */
 package com.ibm.icu.impl;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.util.MissingResourceException;
+import java.util.logging.Logger;
+
+import com.ibm.icu.util.VersionInfo;
 
 /**
  * Provides access to ICU data files as InputStreams.  Implements security checking.
  */
 public final class ICUData {
-    /*
-     * Return a URL to the ICU resource names resourceName.  The
-     * resource name should either be an absolute path, or a path relative to
-     * com.ibm.icu.impl (e.g., most likely it is 'data/foo').  If required
-     * is true, throw an MissingResourceException instead of returning a null result.
+    /**
+     * The data path to be used with getBundleInstance API
+     */
+    static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
+    /**
+     * The ICU data package name.
+     * This is normally the name of the .dat package, and the prefix (plus '/')
+     * of the package entry names.
+     */
+    static final String PACKAGE_NAME = "icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
+    /**
+     * The data path to be used with Class.getResourceAsStream().
+     */
+    public static final String ICU_BUNDLE = "data/" + PACKAGE_NAME;
+
+    /**
+     * The base name of ICU data to be used with ClassLoader.getResourceAsStream(),
+     * ICUResourceBundle.getBundleInstance() etc.
+     */
+    public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
+
+    /**
+     * The base name of collation data to be used with getBundleInstance API
      */
+    public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
+
+    /**
+     * The base name of rbbi data to be used with getData API
+     */
+    public static final String ICU_BRKITR_NAME = "brkitr";
+
+    /**
+     * The base name of rbbi data to be used with getBundleInstance API
+     */
+    public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + '/' + ICU_BRKITR_NAME;
+
+    /**
+     * The base name of rbnf data to be used with getBundleInstance API
+     */
+    public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
+
+    /**
+     * The base name of transliterator data to be used with getBundleInstance API
+     */
+    public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
+
+    public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
+    public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
+    public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
+    public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
+
+    /**
+     * For testing (otherwise false): When reading an InputStream from a Class or ClassLoader
+     * (that is, not from a file), log when the stream contains ICU binary data.
+     *
+     * This cannot be ICUConfig'ured because ICUConfig calls ICUData.getStream()
+     * to read the properties file, so we would get a circular dependency
+     * in the class initialization.
+     */
+    private static final boolean logBinaryDataFromInputStream = false;
+    private static final Logger logger = logBinaryDataFromInputStream ?
+            Logger.getLogger(ICUData.class.getName()) : null;
+
     public static boolean exists(final String resourceName) {
         URL i = null;
         if (System.getSecurityManager() != null) {
@@ -38,10 +99,9 @@ public final class ICUData {
         }
         return i != null;
     }
-        
+
     private static InputStream getStream(final Class<?> root, final String resourceName, boolean required) {
         InputStream i = null;
-        
         if (System.getSecurityManager() != null) {
             i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
                     public InputStream run() {
@@ -55,10 +115,14 @@ public final class ICUData {
         if (i == null && required) {
             throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName);
         }
+        checkStreamForBinaryData(i, resourceName);
         return i;
     }
 
-    private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
+    /**
+     * Should be called only from ICUBinary.getData() or from convenience overloads here.
+     */
+    static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
         InputStream i = null;
         if (System.getSecurityManager() != null) {
             i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
@@ -72,40 +136,67 @@ public final class ICUData {
         if (i == null && required) {
             throw new MissingResourceException("could not locate data", loader.toString(), resourceName);
         }
+        checkStreamForBinaryData(i, resourceName);
         return i;
     }
-    
+
+    @SuppressWarnings("unused")  // used if logBinaryDataFromInputStream == true
+    private static void checkStreamForBinaryData(InputStream is, String resourceName) {
+        if (logBinaryDataFromInputStream && is != null && resourceName.indexOf(PACKAGE_NAME) >= 0) {
+            try {
+                is.mark(32);
+                byte[] b = new byte[32];
+                int len = is.read(b);
+                if (len == 32 && b[2] == (byte)0xda && b[3] == 0x27) {
+                    String msg = String.format(
+                            "ICU binary data file loaded from Class/ClassLoader as InputStream " +
+                            "from %s: MappedData %02x%02x%02x%02x  dataFormat %02x%02x%02x%02x",
+                            resourceName,
+                            b[0], b[1], b[2], b[3],
+                            b[12], b[13], b[14], b[15]);
+                    logger.info(msg);
+                }
+                is.reset();
+            } catch (IOException ignored) {
+            }
+        }
+    }
+
     public static InputStream getStream(ClassLoader loader, String resourceName){
-        return getStream(loader,resourceName, false);   
+        return getStream(loader,resourceName, false);
     }
 
     public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
         return getStream(loader, resourceName, true);
     }
 
-    /*
+    /**
      * Convenience override that calls getStream(ICUData.class, resourceName, false);
+     * Returns null if the resource could not be found.
      */
     public static InputStream getStream(String resourceName) {
         return getStream(ICUData.class, resourceName, false);
     }
-        
-    /*
+
+    /**
      * Convenience method that calls getStream(ICUData.class, resourceName, true).
+     * @throws MissingResourceException if the resource could not be found
      */
     public static InputStream getRequiredStream(String resourceName) {
         return getStream(ICUData.class, resourceName, true);
     }
 
-    /*
+    /**
      * Convenience override that calls getStream(root, resourceName, false);
+     * Returns null if the resource could not be found.
      */
     public static InputStream getStream(Class<?> root, String resourceName) {
         return getStream(root, resourceName, false);
     }
-    
-    /*
+
+    /**
      * Convenience method that calls getStream(root, resourceName, true).
+     * @throws MissingResourceException if the resource could not be found
      */
     public static InputStream getRequiredStream(Class<?> root, String resourceName) {
         return getStream(root, resourceName, true);
index 63394d76070fa40c0332b4a47cb8850af4b845e1..3f6fb14970aaadbb1a04b9968378c15cda7134dc 100644 (file)
@@ -29,52 +29,76 @@ import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.UResourceBundle;
 import com.ibm.icu.util.UResourceBundleIterator;
 import com.ibm.icu.util.UResourceTypeMismatchException;
-import com.ibm.icu.util.VersionInfo;
 
 public  class ICUResourceBundle extends UResourceBundle {
     /**
      * The data path to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
+    @Deprecated
+    protected static final String ICU_DATA_PATH = ICUData.ICU_DATA_PATH;
     /**
      * The data path to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
+    @Deprecated
+    public static final String ICU_BUNDLE = ICUData.ICU_BUNDLE;
 
     /**
      * The base name of ICU data to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
+    @Deprecated
+    public static final String ICU_BASE_NAME = ICUData.ICU_BASE_NAME;
 
     /**
      * The base name of collation data to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
-
-    /**
-     * The base name of rbbi data to be used with getData API
-     */
-    public static final String ICU_BRKITR_NAME = "/brkitr";
+    @Deprecated
+    public static final String ICU_COLLATION_BASE_NAME = ICUData.ICU_COLLATION_BASE_NAME;
 
     /**
      * The base name of rbbi data to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME;
+    @Deprecated
+    public static final String ICU_BRKITR_BASE_NAME = ICUData.ICU_BRKITR_BASE_NAME;
 
     /**
      * The base name of rbnf data to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
+    @Deprecated
+    public static final String ICU_RBNF_BASE_NAME = ICUData.ICU_RBNF_BASE_NAME;
 
     /**
      * The base name of transliterator data to be used with getBundleInstance API
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
      */
-    public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
+    @Deprecated
+    public static final String ICU_TRANSLIT_BASE_NAME = ICUData.ICU_TRANSLIT_BASE_NAME;
 
-    public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
-    public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
-    public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
-    public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
+    /**
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
+     */
+    @Deprecated
+    public static final String ICU_LANG_BASE_NAME = ICUData.ICU_LANG_BASE_NAME;
+    /**
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
+     */
+    @Deprecated
+    public static final String ICU_CURR_BASE_NAME = ICUData.ICU_CURR_BASE_NAME;
+    /**
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
+     */
+    @Deprecated
+    public static final String ICU_REGION_BASE_NAME = ICUData.ICU_REGION_BASE_NAME;
+    /**
+     * @deprecated because not specific to resource bundles; use the ICUData constants instead
+     */
+    @Deprecated
+    public static final String ICU_ZONE_BASE_NAME = ICUData.ICU_ZONE_BASE_NAME;
 
     private static final String NO_INHERITANCE_MARKER = "\u2205\u2205\u2205";
 
index 6c921db88629b8e74d75dcbad580be51ab1339d5..1f3a075a283345f921f12dd6d2b2a6da7a192e17 100644 (file)
@@ -328,11 +328,25 @@ public final class ICUResourceBundleReader {
         @Override
         protected ICUResourceBundleReader createInstance(ReaderInfo key, ReaderInfo data) {
             String fullName = ICUResourceBundleReader.getFullName(data.baseName, data.localeID);
-            InputStream stream = ICUData.getStream(data.loader, fullName);
-            if (stream == null) {
-                return NULL_READER;
+            try {
+                ByteBuffer inBytes;
+                if (data.baseName != null && data.baseName.startsWith(ICUData.ICU_BASE_NAME)) {
+                    String itemPath = fullName.substring(ICUData.ICU_BASE_NAME.length() + 1);
+                    inBytes = ICUBinary.getData(data.loader, fullName, itemPath);
+                    if (inBytes == null) {
+                        return NULL_READER;
+                    }
+                } else {
+                    InputStream stream = ICUData.getStream(data.loader, fullName);
+                    if (stream == null) {
+                        return NULL_READER;
+                    }
+                    inBytes = ICUBinary.getByteBufferFromInputStream(stream);
+                }
+                return new ICUResourceBundleReader(inBytes, data.baseName, data.localeID, data.loader);
+            } catch (IOException ex) {
+                throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
             }
-            return new ICUResourceBundleReader(stream, data.baseName, data.localeID, data.loader);
         }
     }
 
@@ -342,14 +356,10 @@ public final class ICUResourceBundleReader {
     private ICUResourceBundleReader() {
     }
 
-    private ICUResourceBundleReader(InputStream stream, String baseName, String localeID, ClassLoader loader) {
-        try {
-            ByteBuffer inBytes = ICUBinary.getByteBufferFromInputStream(stream);
-            init(inBytes);
-        } catch (IOException ex) {
-            String fullName = ICUResourceBundleReader.getFullName(baseName, localeID);
-            throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
-        }
+    private ICUResourceBundleReader(ByteBuffer inBytes,
+            String baseName, String localeID,
+            ClassLoader loader) throws IOException {
+        init(inBytes);
 
         // set pool bundle keys if necessary
         if (usesPoolBundle) {
@@ -377,7 +387,7 @@ public final class ICUResourceBundleReader {
     private void init(ByteBuffer inBytes) throws IOException {
         dataVersion = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE);
         boolean isFormatVersion10 = inBytes.get(16) == 1 && inBytes.get(17) == 0;
-        bytes = inBytes.slice();
+        bytes = ICUBinary.sliceWithOrder(inBytes);
         int dataLength = bytes.remaining();
 
         if(DEBUG) System.out.println("The ByteBuffer is direct (memory-mapped): " + bytes.isDirect());
@@ -420,7 +430,7 @@ public final class ICUResourceBundleReader {
             if(_16BitTop > keysTop) {
                 int num16BitUnits = (_16BitTop - keysTop) * 2;
                 bytes.position(keysTop << 2);
-                b16BitUnits = bytes.slice().asCharBuffer();
+                b16BitUnits = bytes.asCharBuffer();
                 b16BitUnits.limit(num16BitUnits);
                 maxOffset |= num16BitUnits - 1;
             } else {
@@ -444,7 +454,7 @@ public final class ICUResourceBundleReader {
                 // unlike regular bundles' key strings for which indexes
                 // are based on the start of the bundle data.
                 bytes.position((1 + indexLength) << 2);
-                bytes = bytes.slice();
+                bytes = ICUBinary.sliceWithOrder(bytes);
             } else {
                 localKeyLimit = getIndexesInt(URES_INDEX_KEYS_TOP) << 2;
             }
@@ -582,38 +592,18 @@ public final class ICUResourceBundleReader {
             return makeKeyStringFromBytes(poolBundleKeys, keyOffset & 0x7fffffff);
         }
     }
-    // Compare the length-specified input key with the
-    // NUL-terminated table key.
-    private static int compareKeys(CharSequence key, ByteBuffer keyBytes, int keyOffset) {
-        for(int i = 0;; ++i, ++keyOffset) {
-            int c2 = keyBytes.get(keyOffset);
-            if(c2 == 0) {
-                if(i == key.length()) {
-                    return 0;
-                } else {
-                    return 1;  // key > table key because key is longer.
-                }
-            } else if(i == key.length()) {
-                return -1;  // key < table key because key is shorter.
-            }
-            int diff = (int)key.charAt(i) - c2;
-            if(diff != 0) {
-                return diff;
-            }
-        }
-    }
     private int compareKeys(CharSequence key, char keyOffset) {
         if(keyOffset < localKeyLimit) {
-            return compareKeys(key, bytes, keyOffset);
+            return ICUBinary.compareKeys(key, bytes, keyOffset);
         } else {
-            return compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
+            return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
         }
     }
     private int compareKeys32(CharSequence key, int keyOffset) {
         if(keyOffset >= 0) {
-            return compareKeys(key, bytes, keyOffset);
+            return ICUBinary.compareKeys(key, bytes, keyOffset);
         } else {
-            return compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
+            return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
         }
     }
 
@@ -743,7 +733,7 @@ public final class ICUResourceBundleReader {
                 offset += 4;
                 ByteBuffer result = bytes.duplicate();
                 result.position(offset).limit(offset + length);
-                result = result.slice();
+                result = ICUBinary.sliceWithOrder(result);
                 if(!result.isReadOnly()) {
                     result = result.asReadOnlyBuffer();
                 }
index 4a69d36d25ef319cd2166ffa6bc4345b78ce0192..cc3ee5203f518dfbf45616233269b77ace0f1a55 100644 (file)
@@ -345,7 +345,7 @@ public final class Norm2AllModes {
             protected Norm2AllModes createInstance(String key, ByteBuffer bytes) {
                 Normalizer2Impl impl;
                 if(bytes==null) {
-                    impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm");
+                    impl=new Normalizer2Impl().load(key+".nrm");
                 } else {
                     impl=new Normalizer2Impl().load(bytes);
                 }
@@ -365,8 +365,7 @@ public final class Norm2AllModes {
     private static final class Norm2AllModesSingleton {
         private Norm2AllModesSingleton(String name) {
             try {
-                Normalizer2Impl impl=new Normalizer2Impl().load(
-                        ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm");
+                Normalizer2Impl impl=new Normalizer2Impl().load(name+".nrm");
                 allModes=new Norm2AllModes(impl);
             } catch(RuntimeException e) {
                 exception=e;
index 33f0700072926005cbb104d17917074772fa8c55..0397ca199f0415d9c68e98630334cef59f0981ce 100644 (file)
@@ -494,11 +494,7 @@ public final class Normalizer2Impl {
         }
     }
     public Normalizer2Impl load(String name) {
-        try {
-            return load(ICUBinary.getByteBufferFromInputStream(ICUData.getRequiredStream(name)));
-        } catch(IOException e) {
-            throw new ICUUncheckedIOException(e);
-        }
+        return load(ICUBinary.getRequiredData(name));
     }
 
     private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
index 495fe3ff581e1298b6ef239f973523b5cd18f3fb..62d6f9f9808e6cf99e70b19e81395835feaf8381 100644 (file)
@@ -98,10 +98,12 @@ public abstract class Trie2 implements Iterable<Trie2.Range> {
             header.signature = bytes.getInt();
             switch (header.signature) {
             case 0x54726932:
-                bytes.order(ByteOrder.BIG_ENDIAN);
+                // The buffer is already set to the trie data byte order.
                 break;
             case 0x32697254:
-                bytes.order(ByteOrder.LITTLE_ENDIAN);
+                // Temporarily reverse the byte order.
+                boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
+                bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
                 header.signature = 0x54726932;
                 break;
             default:
index f4bf1d7e33b5c05e31ca502f20d017458cd8be69..c382182c9c2d46728ca15eaf7481e837223a6c44 100644 (file)
@@ -20,7 +20,6 @@
 package com.ibm.icu.impl;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
@@ -34,8 +33,7 @@ public final class UBiDiProps {
 
     // port of ubidi_openProps()
     private UBiDiProps() throws IOException{
-        InputStream is=ICUData.getStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
-        ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+        ByteBuffer bytes=ICUBinary.getData(DATA_FILE_NAME);
         readData(bytes);
     }
 
index c45f5ae7721ac4f1eb92074a69e790bee9d9f14f..d3920a8fecd26f28ba2f8f0d80e10cd53f1e97bf 100644 (file)
@@ -20,7 +20,6 @@
 package com.ibm.icu.impl;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
@@ -37,8 +36,7 @@ public final class UCaseProps {
 
     // port of ucase_openProps()
     private UCaseProps() throws IOException {
-        InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
-        ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
         readData(bytes);
     }
 
index 1d2dbfa593be73577e9c1ff67b705b9a73bf03a0..053d1b77fcc9b8ad5baa68e8aa7d8dcf9825c3ef 100644 (file)
@@ -8,7 +8,6 @@
 package com.ibm.icu.impl;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Locale;
 import java.util.MissingResourceException;
@@ -1039,7 +1038,7 @@ public final class UCharacterName
     /**
     * Default name of the name datafile
     */
-    private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu";
+    private static final String FILE_NAME_ = "unames.icu";
     /**
     * Shift count to retrieve group information
     */
@@ -1168,8 +1167,7 @@ public final class UCharacterName
     */
     private UCharacterName() throws IOException
     {
-        InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
-        ByteBuffer b = ICUBinary.getByteBufferFromInputStream(is);
+        ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
         UCharacterNameReader reader = new UCharacterNameReader(b);
         reader.read(this);
     }
index 599dc38e03e0257423380faa9474ef51391d2b3d..010682564f5c0adb2c61db39d11bfd21d7130a77 100644 (file)
@@ -8,7 +8,6 @@
 package com.ibm.icu.impl;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 import java.util.MissingResourceException;
@@ -970,7 +969,7 @@ public final class UCharacterProperty
     /**
     * Default name of the datafile
     */
-    private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu";
+    private static final String DATA_FILE_NAME_ = "uprops.icu";
 
     /**
     * Shift value for lead surrogate to form a supplementary character.
@@ -1184,8 +1183,7 @@ public final class UCharacterProperty
         }
 
         // jar access
-        InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
-        ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+        ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
         m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
         // Read or skip the 16 indexes.
         int propertyOffset = bytes.getInt();
index 23eee0224647486dc7c079268ed1164082206860..600eb205d84225a7a381ddc6bc91b7b71641f7ca 100644 (file)
@@ -13,7 +13,6 @@
 package com.ibm.icu.impl;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.MissingResourceException;
 
@@ -116,8 +115,7 @@ public final class UPropertyAliases {
     }
 
     private UPropertyAliases() throws IOException {
-        InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu");
-        ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(stream);
+        ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
         load(bytes);
     }
 
index 73cb79a7eb11fa5f93e9c67e97798a55e38f8e6c..5f4ee27da96a382f66b91149c86b26ee43c63ecc 100644 (file)
@@ -1,7 +1,7 @@
 /*
  ******************************************************************************
- * Copyright (C) 2007-2011, International Business Machines Corporation and   *
- * others. All Rights Reserved.                                               *
+ * Copyright (C) 2007-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  ******************************************************************************
  */
 
@@ -103,26 +103,21 @@ public class ResourceBasedPeriodFormatterDataService extends
                 if (ln != null) {
                     String name = PATH + "pfd_" + ln + ".xml";
                     try {
-                        InputStream is = ICUData.getStream(getClass(), name);
-                        if (is == null) {
-                            throw new MissingResourceException(
-                                    "no resource named " + name, name, "");
-                        } else {
-                            DataRecord dr = DataRecord.read(ln,
-                                    new XMLRecordReader(new InputStreamReader(
-                                            is, "UTF-8")));
-                            if (dr != null) {
-                                // debug
-                                // if (false && ln.equals("ar_EG")) {
-                                // OutputStreamWriter osw = new
-                                // OutputStreamWriter(System.out, "UTF-8");
-                                // XMLRecordWriter xrw = new
-                                // XMLRecordWriter(osw);
-                                // dr.write(xrw);
-                                // osw.flush();
-                                // }
-                                ld = new PeriodFormatterData(localeName, dr);
-                            }
+                        InputStream is = ICUData.getRequiredStream(getClass(), name);
+                        DataRecord dr = DataRecord.read(ln,
+                                new XMLRecordReader(new InputStreamReader(
+                                        is, "UTF-8")));
+                        if (dr != null) {
+                            // debug
+                            // if (false && ln.equals("ar_EG")) {
+                            // OutputStreamWriter osw = new
+                            // OutputStreamWriter(System.out, "UTF-8");
+                            // XMLRecordWriter xrw = new
+                            // XMLRecordWriter(osw);
+                            // dr.write(xrw);
+                            // osw.flush();
+                            // }
+                            ld = new PeriodFormatterData(localeName, dr);
                         }
                     } catch (UnsupportedEncodingException e) {
                         throw new MissingResourceException(
index 13c3428b958cb69ec07e23aa38e0bcdb2b8168be..00dafd56577917c0e13d6f4e90c5635bd48272aa 100644 (file)
@@ -8,7 +8,6 @@
 package com.ibm.icu.text;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.Locale;
 import java.util.MissingResourceException;
@@ -111,9 +110,8 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
         try {
             String         typeKey       = KIND_NAMES[kind];
             String         brkfname      = rb.getStringWithFallback("boundaries/" + typeKey);
-            String         rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname;
-            InputStream    ruleStream    = ICUData.getStream(rulesFileName);
-                           bytes         = ICUBinary.getByteBufferFromInputStream(ruleStream);
+            String         rulesFileName = ICUData.ICU_BRKITR_NAME+ '/' + brkfname;
+                           bytes         = ICUBinary.getData(rulesFileName);
         }
         catch (Exception e) {
             throw new MissingResourceException(e.toString(),"","");
index 5665fb16cf701d1bd8f75bf47b553d58dc1c5511..c6bfe1dd9318eb191c072acb8f59bf33b5bfeedf 100644 (file)
@@ -8,7 +8,6 @@
 package com.ibm.icu.text;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 
 import com.ibm.icu.impl.Assert;
@@ -45,9 +44,8 @@ final class DictionaryData {
     public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
         ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME);
         String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
-        dictFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + dictFileName;
-        InputStream is = ICUData.getStream(dictFileName);
-        ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(is);
+        dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
+        ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
         ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
         int[] indexes = new int[IX_COUNT];
         // TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
index e85ce8f93cd8c8d7d36dedd9c8740e56be73a953..0745238c6e8a9076e7747a2fcdb565793cd7c935 100644 (file)
@@ -9,10 +9,12 @@ package com.ibm.icu.text;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 
 import com.ibm.icu.impl.CharTrie;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.ICUBinary.Authenticate;
 
 /**
 * <p>Internal class used for Rule Based Break Iterators</p>
@@ -32,7 +34,20 @@ final class RBBIDataWrapper {
     CharTrie       fTrie;
     String         fRuleSource;
     int            fStatusTable[];
-    
+
+    private boolean isBigEndian;
+
+    static final int DATA_FORMAT = 0x42726b20;  // "Brk "
+    static final int FORMAT_VERSION = 0x03010000;  // 3.1
+
+    private static final class IsAcceptable implements Authenticate {
+        // @Override when we switch to Java 6
+        public boolean isDataVersionAcceptable(byte version[]) {
+            return version[0] == (FORMAT_VERSION >>> 24);
+        }
+    }
+    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
     //
     // Indexes to fields in the ICU4C style binary form of the RBBI Data Header
     //   Used by the rule compiler when flattening the data.
@@ -70,12 +85,12 @@ final class RBBIDataWrapper {
     // Index offsets to header fields of a state table
     //     struct RBBIStateTable {...   in the C version.
     //
-    final static int      NUMSTATES  = 0;
-    final static int      ROWLEN     = 2;
-    final static int      FLAGS      = 4;
-    final static int      RESERVED_2 = 6;
-    final static int      ROW_DATA   = 8;
-    
+            static final int NUMSTATES  = 0;
+            static final int ROWLEN     = 2;
+            static final int FLAGS      = 4;
+    //ivate static final int RESERVED_2 = 6;
+    private static final int ROW_DATA   = 8;
+
     //  Bit selectors for the "FLAGS" field of the state table header
     //     enum RBBIStateTableFlags in the C version.
     //
@@ -153,18 +168,20 @@ final class RBBIDataWrapper {
 
         RBBIDataWrapper This = new RBBIDataWrapper();
 
-        // Seek past the ICU data header.
-        //   TODO:  verify that the header looks good.
-        ICUBinary.skipBytes(bytes, 0x80);
+        ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
+        This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
 
         // Read in the RBBI data header...
         This.fHeader = new  RBBIDataHeader();
         This.fHeader.fMagic          = bytes.getInt();
-        This.fHeader.fVersion        = bytes.getInt();
-        This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24);
-        This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16);
-        This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8);
-        This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion);
+        // Read the same 4 bytes as an int and as a byte array: The data format could be
+        // the old fVersion=1 (TODO: probably not with a real ICU data header?)
+        // or the new fFormatVersion=3.x.
+        This.fHeader.fVersion        = bytes.getInt(bytes.position());
+        This.fHeader.fFormatVersion[0] = bytes.get();
+        This.fHeader.fFormatVersion[1] = bytes.get();
+        This.fHeader.fFormatVersion[2] = bytes.get();
+        This.fHeader.fFormatVersion[3] = bytes.get();
         This.fHeader.fLength         = bytes.getInt();
         This.fHeader.fCatCount       = bytes.getInt();
         This.fHeader.fFTable         = bytes.getInt();
@@ -322,14 +339,20 @@ final class RBBIDataWrapper {
     ///CLOVER:OFF
     //  Getters for fields from the state table header
     //
-    final static int   getNumStates(short  table[]) {
-        int  hi = table[NUMSTATES];
-        int  lo = table[NUMSTATES+1];
-        int  val = (hi<<16) + (lo&0x0000ffff);
-        return val;
+    private int getStateTableNumStates(short table[]) {
+        if (isBigEndian) {
+            return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
+        } else {
+            return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
+        }
     }
     ///CLOVER:ON
 
+    int getStateTableFlags(short table[]) {
+        // This works for up to 15 flags bits.
+        return table[isBigEndian ? FLAGS + 1 : FLAGS];
+    }
+
     ///CLOVER:OFF
     /* Debug function to display the break iterator data. */
     void dump() {
@@ -395,7 +418,7 @@ final class RBBIDataWrapper {
                 System.out.print("-");
             }
             System.out.println();
-            for (state=0; state< getNumStates(table); state++) {
+            for (state=0; state< getStateTableNumStates(table); state++) {
                 dumpRow(table, state);   
             }
             System.out.println();
index af0c131f600a1d68e302798d3062f0bccde89918..20846fcc29555469ffe92d8bc200b0847f48290d 100644 (file)
@@ -1,5 +1,5 @@
 //
-//    Copyright (C) 2002-2009, International Business Machines Corporation and others.
+//    Copyright (C) 2002-2014, International Business Machines Corporation and others.
 //    All Rights Reserved.
 //
 //
@@ -16,6 +16,7 @@ import java.util.Map;
 import java.util.Set;
 
 import com.ibm.icu.impl.Assert;
+import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUDebug;
 
 class RBBIRuleBuilder {
@@ -185,12 +186,8 @@ class RBBIRuleBuilder {
 
         //
         // Write out an ICU Data Header
-        //   TODO:  actually create a real header, rather than just a placeholder.
-        //           The empty placeholder is ok for compile-and-go from within ICU4J.
-        //           Replicating the ICU4C genbrk tool for building .brk resources would need a real header.
         //
-        byte[] ICUDataHeader = new byte[0x80];
-        dos.write(ICUDataHeader);
+        ICUBinary.writeHeader(RBBIDataWrapper.DATA_FORMAT, RBBIDataWrapper.FORMAT_VERSION, 0, dos);
 
         //
         // Write out the RBBIDataHeader
index b0e08d78895422cffd849a77d5b24ebfcfa4c54d..0dd194f386dc60f0c0e3c8d518312e40161c20af 100644 (file)
@@ -1200,7 +1200,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         int state           = START_STATE;
         int row             = fRData.getRowIndex(state); 
         short category      = 3;
-        short flagsState    = stateTable[RBBIDataWrapper.FLAGS+1];
+        int flagsState      = fRData.getStateTableFlags(stateTable);
         int mode            = RBBI_RUN;
         if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
             category = 2;
@@ -1373,7 +1373,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         int            initialPosition    = 0;
         int            lookaheadResult    = 0;
         boolean        lookAheadHardBreak = 
-            (stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
+            (fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
         
         // handlePrevious() never gets the rule status.
         // Flag the status as invalid; if the user ever asks for status, we will need
@@ -1392,7 +1392,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         row = fRData.getRowIndex(state);
         category = 3;   // TODO:  obsolete?  from the old start/run mode scheme?
         mode     = RBBI_RUN;
-        if ((stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
+        if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
             category = 2;
             mode     = RBBI_START;
         }
index 888377b69e616c84b736419976799bf2af72735a..1f39a709747c5a35d82a7b33bd7f404303af9c81 100644 (file)
@@ -11,7 +11,6 @@ package com.ibm.icu.text;
 
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.LineNumberReader;
 import java.io.Reader;
 import java.nio.ByteBuffer;
@@ -32,6 +31,7 @@ import java.util.regex.Pattern;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.Trie2;
 import com.ibm.icu.impl.Trie2Writable;
+import com.ibm.icu.impl.ICUBinary.Authenticate;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UCharacterCategory;
 import com.ibm.icu.lang.UProperty;
@@ -2172,24 +2172,32 @@ public class SpoofChecker {
             }
         }
 
+        private static final int DATA_FORMAT = 0x43667520;  // "Cfu "
 
+        private static final class IsAcceptable implements Authenticate {
+            // @Override when we switch to Java 6
+            public boolean isDataVersionAcceptable(byte version[]) {
+                return version[0] == 1;
+            }
+        }
+        private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
 
-        // getDefault() - Create a SpoofData instance that is built from
-        //                the data baked into the default ICU data.
+        private static final class DefaultData {
+            private static SpoofData INSTANCE = null;
 
-        static SpoofData getDefault() {
-            // TODO: Cache it. Lazy create, keep until cleanup.
-            SpoofData This = null;
-            try {
-                InputStream is = com.ibm.icu.impl.ICUData.getRequiredStream(com.ibm.icu.impl.ICUResourceBundle.ICU_BUNDLE
-                        + "/confusables.cfu");
-                This = new SpoofData(ICUBinary.getByteBufferFromInputStream(is));
-                is.close();
-            }
-            catch (IOException e) {
-                // Return null in this case.
+            static {
+                try {
+                    INSTANCE = new SpoofData(ICUBinary.getRequiredData("confusables.cfu"));
+                } catch (IOException ignored) {
+                }
             }
-            return This;
+        }
+
+        /**
+         * @return instance for Unicode standard data
+         */
+        static SpoofData getDefault() {
+            return DefaultData.INSTANCE;
         }
 
         // SpoofChecker Data constructor for use from data builder.
@@ -2200,9 +2208,7 @@ public class SpoofChecker {
         // Constructor for use when creating from prebuilt default data.
         // A ByteBuffer is what the ICU internal data loading functions provide.
         SpoofData(ByteBuffer bytes) throws java.io.IOException {
-            // Seek past the ICU data header.
-            // TODO: verify that the header looks good.
-            ICUBinary.skipBytes(bytes, 0x80);
+            ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
             bytes.mark();
             readData(bytes);
         }
index e2f6d3133fa0279d099793acfbd20b7bfad2c15c..d2987af4a16bb76db94eee28d8773f08c19ae95b 100644 (file)
@@ -14,8 +14,6 @@ import java.nio.ByteBuffer;
 
 import com.ibm.icu.impl.CharTrie;
 import com.ibm.icu.impl.ICUBinary;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.StringPrepDataReader;
 import com.ibm.icu.impl.UBiDiProps;
 import com.ibm.icu.lang.UCharacter;
@@ -272,7 +270,10 @@ public final class StringPrep {
      */
     public StringPrep(InputStream inputStream) throws IOException{
         // TODO: Add a public constructor that takes ByteBuffer directly.
-        ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(inputStream);
+        this(ICUBinary.getByteBufferFromInputStream(inputStream));
+    }
+
+    private StringPrep(ByteBuffer bytes) throws IOException {
         StringPrepDataReader reader = new StringPrepDataReader(bytes);
 
         // read the indexes
@@ -328,15 +329,10 @@ public final class StringPrep {
             }
 
             if (instance == null) {
-                InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/"
-                        + PROFILE_NAMES[profile] + ".spp");
-                if (stream != null) {
+                ByteBuffer bytes = ICUBinary.getRequiredData(PROFILE_NAMES[profile] + ".spp");
+                if (bytes != null) {
                     try {
-                        try {
-                            instance = new StringPrep(stream);
-                        } finally {
-                            stream.close();
-                        }
+                        instance = new StringPrep(bytes);
                     } catch (IOException e) {
                         throw new ICUUncheckedIOException(e);
                     }
index 0e5f441577880cd6f6252eeab621856303a0aaf5..67443aa0991890afef6929676f701bec1182488c 100644 (file)
@@ -1,9 +1,7 @@
 /*
  *******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
- *******************************************************************************
- *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
  *******************************************************************************
  */
 
@@ -216,7 +214,6 @@ public class TestConversion extends ModuleTest {
 
     
     private void FromUnicodeCase(ConversionCase cc) {
-
         // create charset encoder for conversion test
         CharsetProviderICU provider = new CharsetProviderICU();
         CharsetEncoder encoder = null;
@@ -227,17 +224,21 @@ public class TestConversion extends ModuleTest {
                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                     : (Charset) provider.charsetForName(cc.charset);
-            encoder = (CharsetEncoder) charset.newEncoder();
-            encoder.onMalformedInput(CodingErrorAction.REPLACE);
-            encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-            if (encoder instanceof CharsetEncoderICU) {
-                ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
-                if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
-                    errln("Fallback could not be set for " + cc.charset);
+            if (charset != null) {
+                encoder = (CharsetEncoder) charset.newEncoder();
+                encoder.onMalformedInput(CodingErrorAction.REPLACE);
+                encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+                if (encoder instanceof CharsetEncoderICU) {
+                    ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
+                    if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
+                        errln("Fallback could not be set for " + cc.charset);
+                    }
                 }
             }
-            
         } catch (Exception e) {
+            encoder = null;
+        }
+        if (encoder == null) {
             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
             } else {
@@ -245,7 +246,7 @@ public class TestConversion extends ModuleTest {
             }
             return;
         }
-        
+
         // set the callback for the encoder 
         if (cc.cbErrorAction != null) {
             if (cc.cbEncoder != null) {
@@ -514,12 +515,16 @@ public class TestConversion extends ModuleTest {
                     ? (Charset) provider.charsetForName(cc.charset.substring(1),
                         "com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
                     : (Charset) provider.charsetForName(cc.charset);
-            decoder = (CharsetDecoder) charset.newDecoder();
-            decoder.onMalformedInput(CodingErrorAction.REPLACE);
-            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-
+            if (charset != null) {
+                decoder = (CharsetDecoder) charset.newDecoder();
+                decoder.onMalformedInput(CodingErrorAction.REPLACE);
+                decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+            }
         } catch (Exception e) {
             // TODO implement loading of test data.
+            decoder = null;
+        }
+        if (decoder == null) {
             if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
                 logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
             } else {
@@ -899,12 +904,12 @@ public class TestConversion extends ModuleTest {
            
            //checking for converter that are not supported at this point        
            try{
-               if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
+                if(charset==null ||
+                        charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
                       charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
                       charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" || 
                       charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
-                   
-                   logln("Converter not supported at this point :" +charset.displayName());
+                    logln("Converter not supported at this point :" + cc.charset);
                    return;
                }