/**
*******************************************************************************
-* Copyright (C) 2006-2011, International Business Machines Corporation and *
-* others. All Rights Reserved. *
+* Copyright (C) 2006-2014, International Business Machines Corporation and
+* others. All Rights Reserved.
*******************************************************************************
-*
-*******************************************************************************
-*/
+*/
package com.ibm.icu.charset;
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
} else {
if (((String)context).equals(ESCAPE_JAVA)) {
while (i < length) {
valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
} else if (((String)context).equals(ESCAPE_C)) {
valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
} else {
valueString[valueStringLength++] = UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
}
} else if (((String)context).equals(ESCAPE_XML_DEC)) {
valueString[valueStringLength++] = UNICODE_AMP_CODEPOINT; /* adding & */
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength, cp, 10, 0);
} else {
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 10, 0);
+ valueStringLength += itou(valueString, valueStringLength, buffer[0], 10, 0);
}
valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
} else if (((String)context).equals(ESCAPE_XML_HEX)) {
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength, cp, 16, 0);
} else {
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 0);
+ valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 0);
}
valueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
} else if (((String)context).equals(ESCAPE_UNICODE)) {
if (length == 2) {
valueStringLength += itou(valueString, valueStringLength,cp, 16, 4);
} else {
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[0] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ valueStringLength += itou(valueString, valueStringLength, buffer[0], 16, 4);
}
valueString[valueStringLength++] = UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
} else if (((String)context).equals(ESCAPE_CSS2)) {
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += itou(valueString, valueStringLength, (int)buffer[i++] & UConverterConstants.UNSIGNED_SHORT_MASK, 16, 4);
+ valueStringLength += itou(valueString, valueStringLength, buffer[i++], 16, 4);
}
}
}
/*
*******************************************************************************
- * Copyright (C) 2008-2011, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 2008-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.charset;
char[] table;
int value;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ if (c >= 0x10000 && !sharedData.mbcs.hasSupplementary()) {
return 0;
}
/* convert the Unicode code point in c into codepage bytes */
table = sharedData.mbcs.fromUnicodeTable;
/* get the byte for the output */
- value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+ value = CharsetMBCS.MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
/* get the byte for the output */
retval[0] = value & 0xff;
if (value >= 0xf00) {
final static class MBCSToUFallback {
int offset;
int codePoint;
+
+ MBCSToUFallback(int off, int cp) {
+ offset = off;
+ codePoint = cp;
+ }
}
/**
MBCSToUFallback toUFallbacks[/* countToUFallbacks */];
/* fromUnicode */
- char fromUnicodeTable[];
+ char fromUnicodeTable[]; // stage1, and for MBCS_OUTPUT_1 also contains stage2
+ int fromUnicodeTableInts[]; // stage1 and stage2 together as int[]
+ // Exactly one of the fromUnicode(Type) tables is not null,
+ // depending on the outputType.
byte fromUnicodeBytes[];
- byte swapLFNLFromUnicodeBytes[]; /* for swaplfnl */
+ char fromUnicodeChars[];
+ int fromUnicodeInts[];
+ char swapLFNLFromUnicodeChars[]; /* for swaplfnl */
int fromUBytesLength;
short outputType, unicodeMask;
ByteBuffer extIndexes; // create int[] view etc. as needed
CharBuffer mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */
- char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
+ // char sbcsIndex[/* SBCS_FAST_LIMIT>>6 */]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */
boolean utf8Friendly; /* for utf8Friendly data */
char maxFastUChar; /* for utf8Friendly data */
/* roundtrips */
- long asciiRoundtrips;
+ int asciiRoundtrips;
UConverterMBCSTable() {
utf8Friendly = false;
mbcsIndex = null;
- sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
+ }
+
+ boolean hasSupplementary() {
+ return (unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0;
}
/*
* stateTableOwned = t.stateTableOwned; countToUFallbacks = t.countToUFallbacks; stateTable = t.stateTable;
* swapLFNLStateTable = t.swapLFNLStateTable; unicodeCodeUnits = t.unicodeCodeUnits; toUFallbacks =
* t.toUFallbacks; fromUnicodeTable = t.fromUnicodeTable; fromUnicodeBytes = t.fromUnicodeBytes;
- * swapLFNLFromUnicodeBytes = t.swapLFNLFromUnicodeBytes; fromUBytesLength = t.fromUBytesLength; outputType =
+ * swapLFNLFromUnicodeChars = t.swapLFNLFromUnicodeChars; fromUBytesLength = t.fromUBytesLength; outputType =
* t.outputType; unicodeMask = t.unicodeMask; swapLFNLName = t.swapLFNLName; baseSharedData = t.baseSharedData;
* extIndexes = t.extIndexes; }
*/
UConverterStaticData staticData = new UConverterStaticData();
UConverterDataReader reader = null;
try {
- String resourceName = classPath + "/" + myName + "." + UConverterSharedData.DATA_TYPE;
- InputStream i;
+ String itemName = myName + '.' + UConverterSharedData.DATA_TYPE;
+ String resourceName = classPath + '/' + itemName;
+ ByteBuffer b;
if (loader != null) {
- i = ICUData.getRequiredStream(loader, resourceName);
+ InputStream i = ICUData.getRequiredStream(loader, resourceName);
+ b = ICUBinary.getByteBufferFromInputStream(i);
+ } else if (!classPath.equals(ICUData.ICU_BUNDLE)) {
+ InputStream i = ICUData.getRequiredStream(resourceName);
+ b = ICUBinary.getByteBufferFromInputStream(i);
} else {
- i = ICUData.getRequiredStream(resourceName);
+ b = ICUBinary.getRequiredData(itemName);
}
- ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i);
reader = new UConverterDataReader(b);
reader.readStaticData(staticData);
} catch (IOException e) {
int offset;
// int[] extIndexesArray = null;
String baseNameString = null;
- int[][] stateTableArray = null;
- MBCSToUFallback[] toUFallbacksArray = null;
- char[] unicodeCodeUnitsArray = null;
- char[] fromUnicodeTableArray = null;
- byte[] fromUnicodeBytesArray = null;
if (header.version[0] == 5 && header.version[1] >= 3 && (header.options & MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK) == 0) {
noFromU = ((header.options & MBCS_OPT_NO_FROM_U) != 0);
if (offset != 0) {
// agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
// terminator byte all already read;
- mbcsTable.extIndexes = reader.readExtIndexes(offset
- - (reader.bytesRead - reader.staticDataBytesRead));
+ mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
}
} catch (IOException e) {
throw new InvalidFormatException();
* for the extension converter separately when it is requested.
*/
mbcsTable.swapLFNLStateTable = null;
- mbcsTable.swapLFNLFromUnicodeBytes = null;
+ mbcsTable.swapLFNLFromUnicodeChars = null;
mbcsTable.swapLFNLName = null;
/*
throw new InvalidFormatException();
}
- stateTableArray = new int[header.countStates][256];
- toUFallbacksArray = new MBCSToUFallback[header.countToUFallbacks];
- for (int i = 0; i < toUFallbacksArray.length; ++i)
- toUFallbacksArray[i] = new MBCSToUFallback();
- unicodeCodeUnitsArray = new char[(header.offsetFromUTable - header.offsetToUCodeUnits) / 2];
- fromUnicodeTableArray = new char[(header.offsetFromUBytes - header.offsetFromUTable) / 2];
- fromUnicodeBytesArray = new byte[header.fromUBytesLength];
- try {
- reader.readMBCSTable(stateTableArray, toUFallbacksArray, unicodeCodeUnitsArray, fromUnicodeTableArray,
- fromUnicodeBytesArray);
- } catch (IOException e) {
- throw new InvalidFormatException();
- }
-
- mbcsTable.countStates = (byte) header.countStates;
- mbcsTable.countToUFallbacks = header.countToUFallbacks;
- mbcsTable.stateTable = stateTableArray;
- mbcsTable.toUFallbacks = toUFallbacksArray;
- mbcsTable.unicodeCodeUnits = unicodeCodeUnitsArray;
-
- mbcsTable.fromUnicodeTable = fromUnicodeTableArray;
- mbcsTable.fromUnicodeBytes = fromUnicodeBytesArray;
- mbcsTable.fromUBytesLength = header.fromUBytesLength;
-
/*
* converter versions 6.1 and up contain a unicodeMask that is used here to select the most efficient
* function implementations
*/
// agljport:fix info.size=sizeof(UDataInfo);
// agljport:fix udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
- // agljport:fix if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
- /* mask off possible future extensions to be safe */
- mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
- // agljport:fix } else {
- /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
- // agljport:fix mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
- // agljport:fix }
+ if (reader.dataFormatHasUnicodeMask()) {
+ /* mask off possible future extensions to be safe */
+ mbcsTable.unicodeMask = (short) (staticData.unicodeMask & 3);
+ } else {
+ /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
+ mbcsTable.unicodeMask = UConverterConstants.HAS_SUPPLEMENTARY | UConverterConstants.HAS_SURROGATES;
+ }
+ try {
+ reader.readMBCSTable(header, mbcsTable);
+ } catch (IOException e) {
+ throw new InvalidFormatException();
+ }
+
if (offset != 0) {
try {
// agljport:commment subtract 32 for sizeof(_MBCSHeader) and length of baseNameString and 1 null
// terminator byte all already read;
// int namelen = baseNameString != null? baseNameString.length() + 1: 0;
- mbcsTable.extIndexes = reader.readExtIndexes(offset
- - (reader.bytesRead - reader.staticDataBytesRead));
+ mbcsTable.extIndexes = reader.readExtIndexes(offset - reader.bytesReadAfterStaticData());
} catch (IOException e) {
throw new InvalidFormatException();
}
if (mbcsTable.countStates == 1) {
/*
* SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher.
- * Build a table with indexes to each block, to be used instaed of
+ * Build a table with indexes to each block, to be used instead of
* the regular stage 1/2 table.
*/
- for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
- mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
- }
+// sbcsIndex = new char[SBCS_FAST_LIMIT>>6];
+// for (int i = 0; i < (SBCS_FAST_LIMIT>>6); ++i) {
+// mbcsTable.sbcsIndex[i] = mbcsTable.fromUnicodeTable[mbcsTable.fromUnicodeTable[i>>4]+((i<<2)&0x3c)];
+// }
/* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header.version[2]>(SBCS_FAST_MAX>>8) */
mbcsTable.maxFastUChar = SBCS_FAST_MAX;
} else {
* MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher.
* The .cnv file is prebuilt with an additional stage table with indexes to each block.
*/
- if (noFromU) {
- mbcsTable.mbcsIndex = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
- }
mbcsTable.maxFastUChar = (char)((header.version[2]<<8) | 0xff);
}
}
/* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */
{
- long asciiRoundtrips = 0xffffffff;
+ int asciiRoundtrips = 0xffffffff;
for (int i = 0; i < 0x80; ++i) {
if (mbcsTable.stateTable[0][i] != MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) {
- asciiRoundtrips&=~((long)1<<(i>>2))&UConverterConstants.UNSIGNED_INT_MASK;
+ asciiRoundtrips &= ~(1 << (i >> 2));
}
}
- mbcsTable.asciiRoundtrips = asciiRoundtrips&UConverterConstants.UNSIGNED_INT_MASK;
+ mbcsTable.asciiRoundtrips = asciiRoundtrips;
}
-
+ // TODO: Use asciiRoundtrips to speed up conversion, like in ICU4C.
+
if (noFromU) {
int stage1Length = (mbcsTable.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) != 0 ? 0x440 : 0x40;
int stage2Length = (header.offsetFromUBytes - header.offsetFromUTable)/4 - stage1Length/2;
mbcsTable.asciiRoundtrips = 0;
}
}
+ // TODO: Use mbcsIndex to speed up UTF-16 conversion, like in ICU4C.
+ mbcsTable.mbcsIndex = null;
return data;
}
-
+
private static boolean writeStage3Roundtrip(UConverterMBCSTable mbcsTable, long value, int codePoints[]) {
char[] table;
byte[] bytes;
long temp;
table = mbcsTable.fromUnicodeTable;
+ int[] tableInts = mbcsTable.fromUnicodeTableInts;
bytes = mbcsTable.fromUnicodeBytes;
+ char[] chars = mbcsTable.fromUnicodeChars;
+ int[] ints = mbcsTable.fromUnicodeInts;
/* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */
switch(mbcsTable.outputType) {
/* locate the stage 2 & 3 data */
stage2 = table[c>>10] + ((c>>4)&0x3f);
- st3 = table[stage2*2]<<16|table[stage2*2 + 1];
+ st3 = tableInts[stage2];
st3 = (int)(char)(st3 * 16 + (c&0xf));
/* write the codepage bytes into stage 3 */
bytes[p+2] = (byte)value;
break;
case MBCS_OUTPUT_4:
- bytes[st3*4] = (byte)(value >> 24);
- bytes[st3*4 + 1] = (byte)(value >> 16);
- bytes[st3*4 + 2] = (byte)(value >> 8);
- bytes[st3*4 + 3] = (byte)value;
+ ints[st3] = (int)value;
break;
default:
/* 2 bytes per character */
- bytes[st3*2] = (byte)(value >> 8);
- bytes[st3*2 + 1] = (byte)value;
+ chars[st3] = (char)value;
break;
}
/* set the roundtrip flag */
temp = (1L<<(16+(c&0xf)));
- table[stage2*2] |= (char)(temp>>16);
- table[stage2*2 + 1] |= (char)temp;
+ tableInts[stage2] |= temp;
}
return true;
}
-
- private static void reconstituteData(UConverterMBCSTable mbcsTable, int stage1Length, int stage2Length, int fullStage2Length) {
- int datalength = stage1Length*2+fullStage2Length*4+mbcsTable.fromUBytesLength;
- int offset = 0;
- byte[] stage = new byte[datalength];
-
- for (int i = 0; i < stage1Length; ++i) {
- stage[i*2] = (byte)(mbcsTable.fromUnicodeTable[i]>>8);
- stage[i*2+1] = (byte)(mbcsTable.fromUnicodeTable[i]);
- }
-
- offset = ((fullStage2Length - stage2Length) * 4) + (stage1Length * 2);
- for (int i = 0; i < stage2Length; ++i) {
- stage[offset + i*4] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]>>8);
- stage[offset + i*4+1] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2]);
- stage[offset + i*4+2] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]>>8);
- stage[offset + i*4+3] = (byte)(mbcsTable.fromUnicodeTable[stage1Length + i*2+1]);
- }
-
- /* indexes into stage 2 count from the bottom of the fromUnicodeTable */
-
- /* reconsitute the initial part of stage 2 from the mbcsIndex */
+
+ private static void reconstituteData(UConverterMBCSTable mbcsTable,
+ int stage1Length, int stage2Length, int fullStage2Length) {
+ char[] stage1 = mbcsTable.fromUnicodeTable;
+
+ // stage2 starts with unused stage1 space.
+ // Indexes into stage 2 count from the bottom of the fromUnicodeTable.
+ int numStage1Ints = stage1Length / 2; // 2 chars = 1 int
+ int[] stage2 = new int[numStage1Ints + fullStage2Length];
+ System.arraycopy(mbcsTable.fromUnicodeTableInts, numStage1Ints,
+ stage2, (fullStage2Length - stage2Length) + numStage1Ints,
+ stage2Length);
+ mbcsTable.fromUnicodeTableInts = stage2;
+
+ /* reconstitute the initial part of stage 2 from the mbcsIndex */
{
int stageUTF8Length=(mbcsTable.maxFastUChar+1)>>6;
int stageUTF8Index=0;
int st1, st2, st3, i;
-
+
for (st1 = 0; stageUTF8Index < stageUTF8Length; ++st1) {
- st2 = ((char)stage[2*st1]<<8) | (0xff & stage[2*st1+1]);
+ st2 = stage1[st1];
if (st2 != stage1Length/2) {
/* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */
for (i = 0; i < 16; ++i) {
* 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are
* allocated together as a single 64-block for access from the mbcsIndex
*/
- stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
- stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
- stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3); st2++; st3++;
- stage[4*st2] = (byte)(st3>>24); stage[4*st2+1] = (byte)(st3>>16); stage[4*st2+2] = (byte)(st3>>8); stage[4*st2+3] = (byte)(st3);
+ stage2[st2++] = st3++;
+ stage2[st2++] = st3++;
+ stage2[st2++] = st3++;
+ stage2[st2++] = st3;
} else {
/* no stage 3 block, skip */
st2+=4;
}
}
}
-
- char[] stage1 = new char[stage.length/2];
- for (int i = 0; i < stage1.length; ++i) {
- stage1[i] = (char)(((stage[i*2])<<8)|(stage[i*2+1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+
+ switch (mbcsTable.outputType) {
+ case CharsetMBCS.MBCS_OUTPUT_2:
+ case CharsetMBCS.MBCS_OUTPUT_2_SISO:
+ case CharsetMBCS.MBCS_OUTPUT_3_EUC:
+ mbcsTable.fromUnicodeChars = new char[mbcsTable.fromUBytesLength / 2];
+ break;
+ case CharsetMBCS.MBCS_OUTPUT_3:
+ case CharsetMBCS.MBCS_OUTPUT_4_EUC:
+ mbcsTable.fromUnicodeBytes = new byte[mbcsTable.fromUBytesLength];
+ break;
+ case CharsetMBCS.MBCS_OUTPUT_4:
+ mbcsTable.fromUnicodeInts = new int[mbcsTable.fromUBytesLength / 4];
+ break;
+ default:
+ // Cannot occur, caller checked already.
+ assert false;
}
- byte[] stage2 = new byte[stage.length - ((stage1Length * 2) + (fullStage2Length * 4))];
- System.arraycopy(stage, ((stage1Length * 2) + (fullStage2Length * 4)), stage2, 0, stage2.length);
-
- mbcsTable.fromUnicodeTable = stage1;
- mbcsTable.fromUnicodeBytes = stage2;
-
+
/* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */
MBCSEnumToUnicode(mbcsTable);
}
}
if (((++b)&0x1f) == 0) {
if(anyCodePoints>=0) {
- if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20)&UConverterConstants.UNSIGNED_INT_MASK, codePoints)) {
+ if(!writeStage3Roundtrip(mbcsTable, value|(b-0x20), codePoints)) {
return false;
}
anyCodePoints=-1;
private boolean EBCDICSwapLFNL() throws Exception {
UConverterMBCSTable mbcsTable;
-
+
char[] table;
- byte[] results;
- byte[] bytes;
-
+
int[][] newStateTable;
- byte[] newResults;
String newName;
-
+
int stage2Entry;
-// int size;
- int sizeofFromUBytes;
-
+
mbcsTable = sharedData.mbcs;
table = mbcsTable.fromUnicodeTable;
- bytes = mbcsTable.fromUnicodeBytes;
- results = bytes;
-
+ int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
+ char[] chars = mbcsTable.fromUnicodeChars;
+ char[] results = chars;
+
/*
* Check that this is an EBCDIC table with SBCS portion -
* SBCS or EBCDIC with standard EBCDIC LF and NL mappings.
*
- * If not, ignore the option Options are always ignored if they do not apply.
+ * If not, ignore the option. Options are always ignored if they do not apply.
*/
if (!((mbcsTable.outputType == MBCS_OUTPUT_1 || mbcsTable.outputType == MBCS_OUTPUT_2_SISO) &&
mbcsTable.stateTable[0][EBCDIC_LF] == MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) &&
return false;
}
} else /* MBCS_OUTPUT_2_SISO */ {
- stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF) &&
- EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF))) {
+ EBCDIC_LF == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_LF))) {
return false;
}
- stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
if (!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL) &&
- EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL))) {
+ EBCDIC_NL == MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, U_NL))) {
return false;
}
}
* We _know_ the number of bytes in the fromUnicodeBytes array
* starting with header.version 4.1.
*/
- sizeofFromUBytes = mbcsTable.fromUBytesLength;
+ // sizeofFromUBytes = mbcsTable.fromUBytesLength;
} else {
/*
* Otherwise:
newStateTable[0][EBCDIC_NL] = MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF);
/* copy and modify the from-Unicode result table */
- newResults = new byte[sizeofFromUBytes];
- System.arraycopy(bytes, 0, newResults, 0, sizeofFromUBytes);
+ char[] newResults = new char[chars.length];
+ System.arraycopy(chars, 0, newResults, 0, chars.length);
/* conveniently, the table access macros work on the left side of expressions */
if (mbcsTable.outputType == MBCS_OUTPUT_1) {
MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_LF, EBCDIC_RT_NL);
MBCS_SINGLE_RESULT_FROM_U_SET(table, newResults, U_NL, EBCDIC_RT_LF);
} else /* MBCS_OUTPUT_2_SISO */ {
- stage2Entry = MBCS_STAGE_2_FROM_U(table, U_LF);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_LF);
MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_LF, EBCDIC_NL);
- stage2Entry = MBCS_STAGE_2_FROM_U(table, U_NL);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, U_NL);
MBCS_VALUE_2_FROM_STAGE_2_SET(newResults, stage2Entry, U_NL, EBCDIC_LF);
}
/* set the canonical converter name */
newName = icuCanonicalName.concat(UConverterConstants.OPTION_SWAP_LFNL_STRING);
-
+
if (mbcsTable.swapLFNLStateTable == null) {
mbcsTable.swapLFNLStateTable = newStateTable;
- mbcsTable.swapLFNLFromUnicodeBytes = newResults;
+ mbcsTable.swapLFNLFromUnicodeChars = newResults;
mbcsTable.swapLFNLName = newName;
}
return true;
/* GB 18030 data ------------------------------------------------------------ */
/* helper macros for linear values for GB 18030 four-byte sequences */
- private static long LINEAR_18030(long a, long b, long c, long d) {
- return ((((a & 0xff) * 10 + (b & 0xff)) * 126L + (c & 0xff)) * 10L + (d & 0xff));
+ private static int LINEAR_18030(int a, int b, int c, int d) {
+ return ((((a & 0xff) * 10 + (b & 0xff)) * 126 + (c & 0xff)) * 10 + (d & 0xff));
}
- private static long LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
+ private static int LINEAR_18030_BASE = LINEAR_18030(0x81, 0x30, 0x81, 0x30);
- private static long LINEAR(long x) {
+ private static int LINEAR(int x) {
return LINEAR_18030(x >>> 24, (x >>> 16) & 0xff, (x >>> 8) & 0xff, x & 0xff);
}
*
* Note that single surrogates are not mapped by GB 18030 as of the re-released mapping tables from 2000-nov-30.
*/
- private static final long gb18030Ranges[][] = new long[/* 14 */][/* 4 */] {
- { 0x10000L, 0x10FFFFL, LINEAR(0x90308130L), LINEAR(0xE3329A35L) },
- { 0x9FA6L, 0xD7FFL, LINEAR(0x82358F33L), LINEAR(0x8336C738L) },
- { 0x0452L, 0x1E3EL, LINEAR(0x8130D330L), LINEAR(0x8135F436L) },
- { 0x1E40L, 0x200FL, LINEAR(0x8135F438L), LINEAR(0x8136A531L) },
- { 0xE865L, 0xF92BL, LINEAR(0x8336D030L), LINEAR(0x84308534L) },
- { 0x2643L, 0x2E80L, LINEAR(0x8137A839L), LINEAR(0x8138FD38L) },
- { 0xFA2AL, 0xFE2FL, LINEAR(0x84309C38L), LINEAR(0x84318537L) },
- { 0x3CE1L, 0x4055L, LINEAR(0x8231D438L), LINEAR(0x8232AF32L) },
- { 0x361BL, 0x3917L, LINEAR(0x8230A633L), LINEAR(0x8230F237L) },
- { 0x49B8L, 0x4C76L, LINEAR(0x8234A131L), LINEAR(0x8234E733L) },
- { 0x4160L, 0x4336L, LINEAR(0x8232C937L), LINEAR(0x8232F837L) },
- { 0x478EL, 0x4946L, LINEAR(0x8233E838L), LINEAR(0x82349638L) },
- { 0x44D7L, 0x464BL, LINEAR(0x8233A339L), LINEAR(0x8233C931L) },
- { 0xFFE6L, 0xFFFFL, LINEAR(0x8431A234L), LINEAR(0x8431A439L) } };
+ private static final int gb18030Ranges[][] = new int[/* 14 */][/* 4 */] {
+ { 0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35) },
+ { 0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738) },
+ { 0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436) },
+ { 0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531) },
+ { 0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534) },
+ { 0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38) },
+ { 0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537) },
+ { 0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32) },
+ { 0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237) },
+ { 0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733) },
+ { 0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837) },
+ { 0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638) },
+ { 0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931) },
+ { 0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439) } };
/* bit flag for UConverter.options indicating GB 18030 special handling */
private static final int MBCS_OPTION_GB18030 = 0x8000;
* single-state codepages that only map to and from BMP code points, and it always returns fallback values.
*/
static char MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(UConverterMBCSTable mbcs, final int b) {
- return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b & UConverterConstants.UNSIGNED_BYTE_MASK]);
+ assert 0 <= b && b <= 0xff;
+ return MBCS_ENTRY_FINAL_VALUE_16(mbcs.stateTable[0][b]);
}
/* single-byte fromUnicode: get the 16-bit result word */
- static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
+ static char MBCS_SINGLE_RESULT_FROM_U(char[] table, char[] results, int c) {
int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
- int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
- return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ int i = table[i1] + (c & 0xf);
+ return results[i];
}
/* single-byte fromUnicode: set the 16-bit result word with newValue*/
- static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, byte[] results, int c, int newValue) {
+ static void MBCS_SINGLE_RESULT_FROM_U_SET(char[] table, char[] results, int c, int newValue) {
int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
- int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
- results[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
- results[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+ int i = table[i1] + (c & 0xf);
+ results[i] = (char) newValue;
}
/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
- static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
- int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as
- // int[] array
- return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)
- | (table[i + 1] & UConverterConstants.UNSIGNED_SHORT_MASK);
+ static int MBCS_STAGE_2_FROM_U(char[] table, int[] tableInts, int c) {
+ int i = table[(c) >>> 10] + ((c >>> 4) & 0x3f);
+ return tableInts[i];
}
private static boolean MBCS_FROM_U_IS_ROUNDTRIP(int stage2Entry, int c) {
return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
}
- static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
- int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
- return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
+ static char MBCS_VALUE_2_FROM_STAGE_2(char[] chars, int stage2Entry, int c) {
+ int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+ return chars[i];
}
-
- static void MBCS_VALUE_2_FROM_STAGE_2_SET(byte[] bytes, int stage2Entry, int c, int newValue) {
- int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
- bytes[i] = (byte)((newValue >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
- bytes[i + 1] = (byte)(newValue & UConverterConstants.UNSIGNED_BYTE_MASK);
+
+ static void MBCS_VALUE_2_FROM_STAGE_2_SET(char[] chars, int stage2Entry, int c, int newValue) {
+ int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+ chars[i] = (char) newValue;
}
- private static int MBCS_VALUE_4_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
- int i = 4 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
- return ((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 24)
- | ((bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
- | ((bytes[i + 2] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
- | (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);
+ private static int MBCS_VALUE_4_FROM_STAGE_2(int[] ints, int stage2Entry, int c) {
+ int i = 16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf);
+ return ints[i];
}
static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
- return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
+ return ((16 * (stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
}
// ------------UConverterExt-------------------------------------------------------
}
static boolean TO_U_IS_PARTIAL(int value) {
- return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;
+ return 0 <= value && value < TO_U_MIN_CODE_POINT;
}
static int TO_U_GET_PARTIAL_INDEX(int value) {
}
private static int TO_U_MAKE_WORD(byte b, int value) {
- return ((b & UConverterConstants.UNSIGNED_BYTE_MASK) << TO_U_BYTE_SHIFT) | value;
+ // TO_U_BYTE_SHIFT == 24: safe to just shift the signed byte-as-int.
+ return (b << TO_U_BYTE_SHIFT) | value;
}
/* use after masking off the roundtrip flag */
static boolean TO_U_IS_CODE_POINT(int value) {
- return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;
+ assert value >= 0;
+ return value <= TO_U_MAX_CODE_POINT;
}
static int TO_U_GET_CODE_POINT(int value) {
- return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);
+ assert value >= 0;
+ return value - TO_U_MIN_CODE_POINT;
}
private static int TO_U_GET_INDEX(int value) {
int oldpos = indexes.position();
Buffer b;
+ // TODO: It is very inefficient to create Buffer objects for each array access.
+ // We should create an inner class Extensions (or sibling class CharsetMBCSExtensions)
+ // which has buffers for the arrays, together with the code that works with them.
indexes.position(indexes.getInt(index << 2));
if (itemType == int.class)
b = indexes.asIntBuffer();
/* GB 18030 */
if (length == 4 && (options & MBCS_OPTION_GB18030) != 0) {
- long[] range;
- long linear;
+ int[] range;
+ int linear;
int i;
linear = LINEAR_18030(toUBytesArray[0], toUBytesArray[1], toUBytesArray[2], toUBytesArray[3]);
linear = range[0] + (linear - range[2]);
/* output this code point */
- cr[0] = toUWriteCodePoint((int) linear, target, offsets, sourceIndex);
+ cr[0] = toUWriteCodePoint(linear, target, offsets, sourceIndex);
return 0;
}
}
if (sharedData.mbcs.countStates == 1) {
- if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+ if (!sharedData.mbcs.hasSupplementary()) {
cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
} else {
cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
for (b = 0; b <= 0xff; b++) {
entry = row[b];
if (MBCS_ENTRY_IS_TRANSITION(entry) &&
- hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {
+ hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry))) {
return true;
}
}
int[] row = stateTable[state];
int entry = row[b];
if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
- return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));
+ return hasValidTrailBytes(stateTable, (short)MBCS_ENTRY_TRANSITION_STATE(entry));
} else {
- short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);
+ int action = MBCS_ENTRY_FINAL_ACTION(entry);
if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
return false; /* SI/SO are illegal for DBCS-only conversion */
} else {
int sourceArrayIndex;
char[] table;
byte[] pArray, bytes;
+ char[] chars;
+ int[] ints;
int pArrayIndex, outputType, c;
int prevSourceIndex, sourceIndex, nextSourceIndex;
int stage2Entry = 0, value = 0, length = 0, prevLength;
}
table = sharedData.mbcs.fromUnicodeTable;
+ int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
sourceArrayIndex = source.position();
+ bytes = sharedData.mbcs.fromUnicodeBytes;
+ ints = sharedData.mbcs.fromUnicodeInts;
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
- bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
} else {
- bytes = sharedData.mbcs.fromUnicodeBytes;
+ chars = sharedData.mbcs.fromUnicodeChars;
}
// asciiRoundtrips = sharedData.mbcs.asciiRoundtrips;
* byte may be output if the "assigned" bit in stage 2 was on. The data structure does not
* support zero byte output as a fallback, and also does not allow output of leading zeros.
*/
- stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
/* get the bytes and the length for the output */
switch (outputType) {
* callback function changed it for its output.
*/
fromUnicodeStatus = prevLength; /* save the old state */
- value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+ if (value <= 0xff) {
if (value == 0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) == false) {
/* no mapping, leave value==0 */
length = 0;
break;
case MBCS_OUTPUT_DBCS_ONLY:
/* table with single-byte results, but only DBCS mappings used */
- value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+ if (value <= 0xff) {
/* no mapping or SBCS result, not taken for DBCS-only */
value = stage2Entry = 0; /* stage2Entry=0 to reset roundtrip flags */
length = 0;
value = ((pArray[pArrayIndex] & UConverterConstants.UNSIGNED_BYTE_MASK) << 16)
| ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
| (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ if (value <= 0xff) {
length = 1;
- } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ } else if (value <= 0xffff) {
length = 2;
} else {
length = 3;
}
break;
case MBCS_OUTPUT_4:
- value = MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c);
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ value = MBCS_VALUE_4_FROM_STAGE_2(ints, stage2Entry, c);
+ if (value < 0) {
+ // Half of the 4-byte values look negative in a signed int.
+ length = 4;
+ } else if (value <= 0xff) {
length = 1;
- } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ } else if (value <= 0xffff) {
length = 2;
- } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffffff) {
+ } else if (value <= 0xffffff) {
length = 3;
} else {
length = 4;
}
break;
case MBCS_OUTPUT_3_EUC:
- value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
+ value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
/* EUC 16-bit fixed-length representation */
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ if (value <= 0xff) {
length = 1;
} else if ((value & 0x8000) == 0) {
value |= 0x8e8000;
| ((pArray[pArrayIndex + 1] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8)
| (pArray[pArrayIndex + 2] & UConverterConstants.UNSIGNED_BYTE_MASK);
/* EUC 16-bit fixed-length representation applied to the first two bytes */
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ if (value <= 0xff) {
length = 1;
- } else if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xffff) {
+ } else if (value <= 0xffff) {
length = 2;
} else if ((value & 0x800000) == 0) {
value |= 0x8e800000;
int p;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
- if (c <= 0xffff || ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) != 0)) {
+ if (c <= 0xffff || sharedData.mbcs.hasSupplementary()) {
table = sharedData.mbcs.fromUnicodeTable;
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
if (sharedData.mbcs.outputType == MBCS_OUTPUT_1) {
- value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
+ value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeChars, c);
/* is this code point assigned, or do we use fallbacks? */
if (isUseFallback ? value >= 0x800 : value >= 0xc00) {
pValue[0] = value & 0xff;
return 1;
}
} else /* outputType!=MBCS_OUTPUT_1 */{
- stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+ int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
/* get the bytes and the length for the output */
switch (sharedData.mbcs.outputType) {
case MBCS_OUTPUT_2:
- value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);
+ value = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeChars, stage2Entry, c);
if (value <= 0xff) {
length = 1;
} else {
* @return if(U_FAILURE) return the code point for cnv->fromUChar32 else return 0 after output has been written
* to the target
*/
- private int fromU(int cp_, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
+ private int fromU(int cp, CharBuffer source, ByteBuffer target, IntBuffer offsets, int sourceIndex,
int length, boolean flush, CoderResult[] cr) {
// ByteBuffer cx;
- long cp = cp_ & UConverterConstants.UNSIGNED_INT_MASK;
useSubChar1 = false;
if (sharedData.mbcs.extIndexes != null
- && initialMatchFromU((int) cp, source, target, offsets, sourceIndex, flush, cr)) {
+ && initialMatchFromU(cp, source, target, offsets, sourceIndex, flush, cr)) {
return 0; /* an extension mapping handled the input */
}
/* GB 18030 */
if ((options & MBCS_OPTION_GB18030) != 0) {
- long[] range;
+ int[] range;
int i;
for (i = 0; i < gb18030Ranges.length; ++i) {
range = gb18030Ranges[i];
if (range[0] <= cp && cp <= range[1]) {
/* found the Unicode code point, output the four-byte sequence for it */
- long linear;
+ int linear;
byte bytes[] = new byte[4];
/* get the linear value of the first GB 18030 code in this range */
int sourceArrayIndex, lastSource;
int targetCapacity, length;
char[] table;
- byte[] results;
+ char[] results;
int c, sourceIndex;
char value, minValue;
table = sharedData.mbcs.fromUnicodeTable;
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
- results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
- // be a ByteBuffer so results can be a 16-bit view
- // of it?
+ results = sharedData.mbcs.swapLFNLFromUnicodeChars;
} else {
- results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
- // ByteBuffer so results can be a 16-bit view of it?
+ results = sharedData.mbcs.fromUnicodeChars;
}
if (useFallback) {
int sourceArrayIndex;
char[] table;
- byte[] results; // agljport:comment results is used to to get 16-bit values out of byte[] array
+ char[] results;
int c;
int sourceIndex, nextSourceIndex;
table = sharedData.mbcs.fromUnicodeTable;
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
- results = sharedData.mbcs.swapLFNLFromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes
- // be a ByteBuffer so results can be a 16-bit view
- // of it?
+ results = sharedData.mbcs.swapLFNLFromUnicodeChars;
} else {
- results = sharedData.mbcs.fromUnicodeBytes; // agljport:comment should swapLFNLFromUnicodeBytes be a
- // ByteBuffer so results can be a 16-bit view of it?
+ results = sharedData.mbcs.fromUnicodeChars;
}
if (useFallback) {
int sourceArrayIndex;
char[] table;
- byte[] bytes;
+ char[] chars;
int c, sourceIndex, nextSourceIndex;
sourceArrayIndex = source.position();
table = sharedData.mbcs.fromUnicodeTable;
+ int[] tableInts = sharedData.mbcs.fromUnicodeTableInts;
if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
- bytes = sharedData.mbcs.swapLFNLFromUnicodeBytes;
+ chars = sharedData.mbcs.swapLFNLFromUnicodeChars;
} else {
- bytes = sharedData.mbcs.fromUnicodeBytes;
+ chars = sharedData.mbcs.fromUnicodeChars;
}
/* get the converter state from UConverter */
}
/* convert the Unicode code point in c into codepage bytes */
- stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
+ stage2Entry = MBCS_STAGE_2_FROM_U(table, tableInts, c);
/* get the bytes and the length for the output */
/* MBCS_OUTPUT_2 */
- value = MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c);
- if ((value & UConverterConstants.UNSIGNED_INT_MASK) <= 0xff) {
+ value = MBCS_VALUE_2_FROM_STAGE_2(chars, stage2Entry, c);
+ if (value <= 0xff) {
length = 1;
} else {
length = 2;
mbcsTable = data.mbcs;
table = mbcsTable.fromUnicodeTable;
- if((mbcsTable.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY)!=0){
+ if(mbcsTable.hasSupplementary()){
maxStage1 = 0x440;
}
else{
if(mbcsTable.outputType==MBCS_OUTPUT_1){
char stage2, stage3;
char minValue;
- CharBuffer results;
- results = ByteBuffer.wrap(mbcsTable.fromUnicodeBytes).asCharBuffer();
-
+ char[] results = mbcsTable.fromUnicodeChars;
+
if(which==ROUNDTRIP_SET) {
/* use only roundtrips */
minValue=0xf00;
/*read the stage 3 block */
stage3 = (char)st3;
do {
- if(results.get(stage3++)>=minValue){
+ if(results[stage3++]>=minValue){
setFillIn.add(c);
}
-
}while((++c&0xf) !=0);
} else {
c+= 16; /*empty stage 2 block */
}
}
} else {
+ int[] tableInts = mbcsTable.fromUnicodeTableInts;
int stage2,stage3;
byte[] bytes;
int st3Multiplier;
int value;
boolean useFallBack;
bytes = mbcsTable.fromUnicodeBytes;
+ char[] chars = mbcsTable.fromUnicodeChars;
+ int[] ints = mbcsTable.fromUnicodeInts;
useFallBack = (which == ROUNDTRIP_AND_FALLBACK_SET);
switch(mbcsTable.outputType) {
case MBCS_OUTPUT_3:
st3Multiplier =2;
break;
}
- //ByteBuffer buffer = (ByteBuffer)charTobyte(table);
-
+
for(st1=0;st1<maxStage1;++st1){
st2 = table[st1];
if(st2>(maxStage1>>1)){
stage2 = st2 ;
- for(st2=0;st2<128;++st2){
+ for(st2=0;st2<64;++st2){
/*read the stage 3 block */
- st3 = table[stage2*2 + st2]<<16;
- st3+=table[stage2*2 + ++st2];
+ st3 = tableInts[stage2 + st2];
if(st3!=0){
//if((st3=table[stage2+st2])!=0){
stage3 = st3Multiplier*16*(st3&UConverterConstants.UNSIGNED_SHORT_MASK);
-
+
/* get the roundtrip flags for the stage 3 block */
- st3>>=16;
- st3 &= UConverterConstants.UNSIGNED_SHORT_MASK;
+ st3>>>=16;
switch(filter) {
case UCNV_SET_FILTER_NONE:
do {
-
if((st3&1)!=0){
setFillIn.add(c);
- stage3+=st3Multiplier;
}else if (useFallBack) {
-
- char b =0;
+ int b =0;
switch(st3Multiplier) {
- case 4 :
-
- b|= ByteBuffer.wrap(bytes).getChar(stage3++);
-
- case 3 :
-
- b|= ByteBuffer.wrap(bytes).getChar(stage3++);
-
- case 2 :
-
- b|= ByteBuffer.wrap(bytes).getChar(stage3) | ByteBuffer.wrap(bytes).getChar(stage3+1);
- stage3+=2;
+ case 4:
+ b = ints[stage3 / 4];
+ break;
+ case 3:
+ b |= bytes[stage3] | bytes[stage3 + 1] | bytes[stage3 + 2];
+ break;
+ case 2:
+ b = chars[stage3 / 2];
+ break;
default:
break;
}
+ stage3+=st3Multiplier;
if(b!=0) {
setFillIn.add(c);
}
case UCNV_SET_FILTER_DBCS_ONLY:
/* Ignore single bytes results (<0x100). */
do {
- if(((st3&1) != 0 || useFallBack) &&
- (UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))) >= 0x100){
+ if(((st3&1) != 0 || useFallBack) && chars[stage3 / 2] >= 0x100){
setFillIn.add(c);
}
st3>>=1;
/* only add code points that map to CNS 11643 planes 1&2 for non-EXT ISO-2202-CN. */
do {
if(((st3&1) != 0 || useFallBack) &&
- ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & (ByteBuffer.wrap(bytes).get(stage3))))==0x81 || value==0x82) ){
+ ((value= (UConverterConstants.UNSIGNED_BYTE_MASK & bytes[stage3]))==0x81 || value==0x82) ){
setFillIn.add(c);
}
st3>>=1;
case UCNV_SET_FILTER_SJIS:
/* only add code points that map tp Shift-JIS codes corrosponding to JIS X 0280. */
do{
-
- if(((st3&1) != 0 || useFallBack) && (value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))>=0x8140 && value<=0xeffc){
+ if(((st3&1) != 0 || useFallBack) && (value=chars[stage3 / 2])>=0x8140 && value<=0xeffc){
setFillIn.add(c);
}
st3>>=1;
/* only add code points that maps to ISO 2022 GR 94 DBCS codes*/
do {
if(((st3&1) != 0 || useFallBack) &&
- (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))- 0xa1a1))<=(0xfefe - 0xa1a1) &&
+ (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])- 0xa1a1))<=(0xfefe - 0xa1a1) &&
(UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
setFillIn.add(c);
}
/*Only add code points that are suitable for HZ DBCS*/
do {
if( ((st3&1) != 0 || useFallBack) &&
- (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=(UConverterConstants.UNSIGNED_SHORT_MASK & (ByteBuffer.wrap(bytes).getChar(stage3))))-0xa1a1))<=(0xfdfe - 0xa1a1) &&
+ (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=chars[stage3 / 2])-0xa1a1))<=(0xfdfe - 0xa1a1) &&
(UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1)) <= (0xfe - 0xa1)){
setFillIn.add(c);
}
if(st3!= 0){
ps3 = st3;
do {
- value = stage3b.get(UConverterConstants.UNSIGNED_SHORT_MASK&stage3.get(ps3++));
+ value = stage3b.get(stage3.get(ps3++));
if(value==0){
/* no mapping do nothing */
}else if (FROM_U_IS_PARTIAL(value)){
}
break;
case UCNV_SET_FILTER_GR94DBCS:
- if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfefe - 0xa1a1)
+ if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfefe - 0xa1a1)
&& (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
-
continue;
}
break;
case UCNV_SET_FILTER_HZ:
- if(!(FROM_U_GET_LENGTH(value)==2 && (UConverterConstants.UNSIGNED_SHORT_MASK & ((value=FROM_U_GET_DATA(value)) - 0xa1a1))<=(0xfdfe - 0xa1a1)
+ if(!(FROM_U_GET_LENGTH(value)==2 && ((value=FROM_U_GET_DATA(value)) - 0xa1a1)<=(0xfdfe - 0xa1a1)
&& (UConverterConstants.UNSIGNED_BYTE_MASK & (value - 0xa1))<= (0xfe - 0xa1))){
continue;
}
package com.ibm.icu.charset;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.ICUBinary;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
final class UConverterAlias {
static final int UNNORMALIZED = 0;
return (alias.length() != 0);
}
- private static final String CNVALIAS_DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE + "/cnvalias.icu";
+ private static final String CNVALIAS_DATA_FILE_NAME = "cnvalias.icu";
private static final synchronized boolean haveAliasData()
throws IOException{
boolean needInit;
- // agljport:todo umtx_lock(NULL);
needInit = gAliasData == null;
/* load converter alias data from file if necessary */
ByteBuffer data = null;
int[] tableArray = null;
int tableStart;
- //byte[] reservedBytes = null;
- InputStream i = ICUData.getRequiredStream(CNVALIAS_DATA_FILE_NAME);
- ByteBuffer b = ICUBinary.getByteBufferFromInputStream(i);
+ ByteBuffer b = ICUBinary.getRequiredData(CNVALIAS_DATA_FILE_NAME);
UConverterAliasDataReader reader = new UConverterAliasDataReader(b);
tableArray = reader.readToc(offsetsCount);
if (gOptionTable[0] != STD_NORMALIZED) {
throw new IOException("Unsupported alias normalization");
}
-
- // agljport:todo umtx_lock(NULL);
+
if (gAliasData == null) {
gAliasData = data;
data = null;
-
- // agljport:fix ucln_common_registerCleanup(UCLN_COMMON_IO,
- // io_cleanup);
- }
- // agljport:todo umtx_unlock(NULL);
-
- /* if a different thread set it first, then close the extra data */
- if (data != null) {
- // agljport:fix udata_close(data); /* NULL if it was set
- // correctly */
}
}
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import com.ibm.icu.charset.CharsetMBCS.MBCSHeader;
+import com.ibm.icu.charset.CharsetMBCS.MBCSToUFallback;
+import com.ibm.icu.charset.CharsetMBCS.UConverterMBCSTable;
import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.InvalidFormatException;
/**
* ucnvmbcs.h
* Indexes and lengths stored in the fromUTableValues[].
*/
-final class UConverterDataReader implements ICUBinary.Authenticate {
+final class UConverterDataReader {
//private final static boolean debug = ICUDebug.enabled("UConverterDataReader");
+ private static final class IsAcceptable implements ICUBinary.Authenticate {
+ // @Override when we switch to Java 6
+ public boolean isDataVersionAcceptable(byte formatVersion[]) {
+ return formatVersion[0] == 6;
+ }
+ }
+ private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
/*
* UConverterDataReader(UConverterDataReader r)
{
unicodeVersion = r.unicodeVersion;
}
*/
- /* the number bytes read from the buffer */
- int bytesRead = 0;
- /* the number of bytes read for static data */
- int staticDataBytesRead = 0;
+ /** The buffer position after the static data. */
+ private int posAfterStaticData;
/**
* <p>Protected constructor.</p>
//if(debug) System.out.println("Bytes in buffer " + bytes.remaining());
byteBuffer = bytes;
- /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, this);
+ /*unicodeVersion = */ICUBinary.readHeader(byteBuffer, DATA_FORMAT_ID, IS_ACCEPTABLE);
//if(debug) System.out.println("Bytes left in byteBuffer " + byteBuffer.remaining());
}
protected void readStaticData(UConverterStaticData sd) throws IOException
{
- int bRead = 0;
sd.structSize = byteBuffer.getInt();
- bRead +=4;
byte[] name = new byte[UConverterConstants.MAX_CONVERTER_NAME_LENGTH];
byteBuffer.get(name);
- bRead +=name.length;
- sd.name = new String(name, 0, name.length);
+ sd.name = new String(name, "US-ASCII");
sd.codepage = byteBuffer.getInt();
- bRead +=4;
sd.platform = byteBuffer.get();
- bRead++;
sd.conversionType = byteBuffer.get();
- bRead++;
sd.minBytesPerChar = byteBuffer.get();
- bRead++;
sd.maxBytesPerChar = byteBuffer.get();
- bRead++;
byteBuffer.get(sd.subChar);
- bRead += sd.subChar.length;
sd.subCharLen = byteBuffer.get();
- bRead++;
sd.hasToUnicodeFallback = byteBuffer.get();
- bRead++;
sd.hasFromUnicodeFallback = byteBuffer.get();
- bRead++;
sd.unicodeMask = (short)(byteBuffer.get() & 0xff);
- bRead++;
sd.subChar1 = byteBuffer.get();
- bRead++;
byteBuffer.get(sd.reserved);
- bRead += sd.reserved.length;
- staticDataBytesRead = bRead;
- bytesRead += bRead;
+ posAfterStaticData = byteBuffer.position();
+ }
+
+ int bytesReadAfterStaticData() {
+ return byteBuffer.position() - posAfterStaticData;
}
protected void readMBCSHeader(CharsetMBCS.MBCSHeader h) throws IOException
{
byteBuffer.get(h.version);
- bytesRead += h.version.length;
h.countStates = byteBuffer.getInt();
- bytesRead+=4;
h.countToUFallbacks = byteBuffer.getInt();
- bytesRead+=4;
h.offsetToUCodeUnits = byteBuffer.getInt();
- bytesRead+=4;
h.offsetFromUTable = byteBuffer.getInt();
- bytesRead+=4;
h.offsetFromUBytes = byteBuffer.getInt();
- bytesRead+=4;
h.flags = byteBuffer.getInt();
- bytesRead+=4;
h.fromUBytesLength = byteBuffer.getInt();
- bytesRead+=4;
if (h.version[0] == 5 && h.version[1] >= 3) {
h.options = byteBuffer.getInt();
- bytesRead+=4;
if ((h.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0) {
h.fullStage2Length = byteBuffer.getInt();
- bytesRead+=4;
}
}
}
-
- protected void readMBCSTable(int[][] stateTableArray, CharsetMBCS.MBCSToUFallback[] toUFallbacksArray, char[] unicodeCodeUnitsArray, char[] fromUnicodeTableArray, byte[] fromUnicodeBytesArray) throws IOException
+
+ protected void readMBCSTable(MBCSHeader header, UConverterMBCSTable mbcsTable) throws IOException
{
- int i, j;
- for(i = 0; i < stateTableArray.length; ++i){
- for(j = 0; j < stateTableArray[i].length; ++j){
- stateTableArray[i][j] = byteBuffer.getInt();
- bytesRead+=4;
- }
+ IntBuffer intBuffer = byteBuffer.asIntBuffer();
+ mbcsTable.countStates = (byte) header.countStates;
+ mbcsTable.stateTable = new int[header.countStates][256];
+ int i;
+ for(i = 0; i < header.countStates; ++i) {
+ intBuffer.get(mbcsTable.stateTable[i]);
}
- for(i = 0; i < toUFallbacksArray.length; ++i) {
- toUFallbacksArray[i].offset = byteBuffer.getInt();
- bytesRead+=4;
- toUFallbacksArray[i].codePoint = byteBuffer.getInt();
- bytesRead+=4;
+
+ mbcsTable.countToUFallbacks = header.countToUFallbacks;
+ mbcsTable.toUFallbacks = new MBCSToUFallback[header.countToUFallbacks];
+ for(i = 0; i < header.countToUFallbacks; ++i) {
+ int offset = intBuffer.get();
+ int codePoint = intBuffer.get();
+ mbcsTable.toUFallbacks[i] = new MBCSToUFallback(offset, codePoint);
}
- for(i = 0; i < unicodeCodeUnitsArray.length; ++i){
- unicodeCodeUnitsArray[i] = byteBuffer.getChar();
- bytesRead+=2;
+ // Skip as many bytes as we have read from the IntBuffer.
+ int length = intBuffer.position() * 4;
+ ICUBinary.skipBytes(byteBuffer, length);
+
+ // Consider leaving some large arrays as CharBuffer/IntBuffer rather than
+ // reading them into Java arrays, to reduce initialization time and memory usage,
+ // at the cost of some performance.
+ // For example: unicodeCodeUnits, fromUnicodeTable, fromUnicodeInts.
+ // Take care not to modify the buffer contents for swaplfnl.
+ CharBuffer charBuffer = byteBuffer.asCharBuffer();
+ length = header.offsetFromUTable - header.offsetToUCodeUnits;
+ assert (length & 1) == 0;
+ mbcsTable.unicodeCodeUnits = new char[length / 2];
+ charBuffer.get(mbcsTable.unicodeCodeUnits);
+ // Skip as many bytes as we have read from the CharBuffer.
+ ICUBinary.skipBytes(byteBuffer, length);
+
+ length = header.offsetFromUBytes - header.offsetFromUTable;
+ assert (length & 1) == 0;
+ int fromUTableCharsLength;
+ if (mbcsTable.outputType == CharsetMBCS.MBCS_OUTPUT_1) {
+ // single-byte table stage1 + stage2
+ fromUTableCharsLength = length / 2;
+ } else if (mbcsTable.hasSupplementary()) {
+ // stage1 for Unicode limit 0x110000 >> 10
+ fromUTableCharsLength = 0x440;
+ } else {
+ // stage1 for BMP limit 0x10000 >> 10
+ fromUTableCharsLength = 0x40;
}
- for(i = 0; i < fromUnicodeTableArray.length; ++i){
- fromUnicodeTableArray[i] = byteBuffer.getChar();
- bytesRead+=2;
+ mbcsTable.fromUnicodeTable = new char[fromUTableCharsLength];
+ charBuffer.get(mbcsTable.fromUnicodeTable);
+ if (mbcsTable.outputType != CharsetMBCS.MBCS_OUTPUT_1) {
+ // Read both stage1 and stage2 together into an int[] array.
+ // Keeping the short stage1 in the array avoids offsetting at runtime.
+ // The stage1 part of this array will not be used.
+ assert (length & 3) == 0;
+ mbcsTable.fromUnicodeTableInts = new int[length / 4];
+ byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeTableInts);
}
- for(i = 0; i < fromUnicodeBytesArray.length; ++i){
- fromUnicodeBytesArray[i] = byteBuffer.get();
- bytesRead++;
+ // Skip as many bytes as are in stage1 + stage2.
+ ICUBinary.skipBytes(byteBuffer, length);
+
+ mbcsTable.fromUBytesLength = header.fromUBytesLength;
+ boolean noFromU = ((header.options & CharsetMBCS.MBCS_OPT_NO_FROM_U) != 0);
+ if (!noFromU) {
+ switch (mbcsTable.outputType) {
+ case CharsetMBCS.MBCS_OUTPUT_1:
+ case CharsetMBCS.MBCS_OUTPUT_2:
+ case CharsetMBCS.MBCS_OUTPUT_2_SISO:
+ case CharsetMBCS.MBCS_OUTPUT_3_EUC:
+ mbcsTable.fromUnicodeChars = new char[header.fromUBytesLength / 2];
+ byteBuffer.asCharBuffer().get(mbcsTable.fromUnicodeChars);
+ ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~1);
+ break;
+ case CharsetMBCS.MBCS_OUTPUT_3:
+ case CharsetMBCS.MBCS_OUTPUT_4_EUC:
+ mbcsTable.fromUnicodeBytes = new byte[header.fromUBytesLength];
+ byteBuffer.get(mbcsTable.fromUnicodeBytes);
+ break;
+ case CharsetMBCS.MBCS_OUTPUT_4:
+ mbcsTable.fromUnicodeInts = new int[header.fromUBytesLength / 4];
+ byteBuffer.asIntBuffer().get(mbcsTable.fromUnicodeInts);
+ ICUBinary.skipBytes(byteBuffer, header.fromUBytesLength & ~3);
+ break;
+ default:
+ // Cannot occur, caller checked already.
+ assert false;
+ }
+ } else {
+ // Optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher.
+ // Needed for reconstituting omitted data.
+ mbcsTable.mbcsIndex = byteBuffer.asCharBuffer();
}
}
StringBuilder name = new StringBuilder();
while((c = (char)byteBuffer.get()) != 0){
name.append(c);
- bytesRead++;
}
- bytesRead++/*for null terminator*/;
return name.toString();
}
//protected int[] readExtIndexes(int skip) throws IOException
- protected ByteBuffer readExtIndexes(int skip) throws IOException
+ protected ByteBuffer readExtIndexes(int skip) throws IOException, InvalidFormatException
{
ICUBinary.skipBytes(byteBuffer, skip);
- int n = byteBuffer.getInt();
- bytesRead+=4;
- int[] indexes = new int[n];
- indexes[0] = n;
- for(int i = 1; i < n; ++i) {
- indexes[i] = byteBuffer.getInt();
- bytesRead+=4;
+ ByteBuffer b = ICUBinary.sliceWithOrder(byteBuffer);
+ int lengthOfIndexes = b.getInt(0);
+ if (lengthOfIndexes < 32) {
+ throw new InvalidFormatException();
}
- //return indexes;
-
- ByteBuffer b = ByteBuffer.allocate(indexes[31]);
- for(int i = 0; i < n; ++i) {
- b.putInt(indexes[i]);
- }
- int len = b.remaining();
- byteBuffer.get(b.array(), b.position(), len);
- bytesRead += len;
+ int numBytesExtensionStructure = b.getInt(31 * 4);
+ b.limit(numBytesExtensionStructure);
+ ICUBinary.skipBytes(byteBuffer, numBytesExtensionStructure);
return b;
}
- /*protected byte[] readExtTables(int n) throws IOException
- {
- byte[] tables = new byte[n];
- int len = byteBuffer.get(tables);
- if(len==-1){
- throw new IOException("Read failed");
- }
- bytesRead += len;
- return tables;
- }*/
-
- byte[] getDataFormatVersion(){
- return DATA_FORMAT_VERSION;
- }
/**
- * Inherited method
+ * Data formatVersion 6.1 and higher has a unicodeMask.
*/
- public boolean isDataVersionAcceptable(byte version[]){
- return version[0] == DATA_FORMAT_VERSION[0];
+ boolean dataFormatHasUnicodeMask() {
+ int formatVersion0 = byteBuffer.get(16) & 0xff;
+ return formatVersion0 > 6 || (formatVersion0 == 6 && byteBuffer.get(17) != 0);
}
-
-/* byte[] getUnicodeVersion(){
- return unicodeVersion;
- }*/
+
// private data members -------------------------------------------------
/**
*/
// DATA_FORMAT_ID_ values taken from icu4c isCnvAcceptable (ucnv_bld.c)
private static final int DATA_FORMAT_ID = 0x636e7674; // dataFormat="cnvt"
- private static final byte DATA_FORMAT_VERSION[] = {(byte)0x6};
}
package com.ibm.icu.impl.coll;
import java.io.IOException;
-import java.io.InputStream;
+import java.nio.ByteBuffer;
import java.util.MissingResourceException;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
/**
* Collation root provider.
}
static { // Corresponds to C++ load() function.
- CollationTailoring t = new CollationTailoring(null);
- // TODO: Optionally load from a .dat file or stand-alone .icu file.
- String path = ICUResourceBundle.ICU_BUNDLE + "/coll/ucadata.icu";
- InputStream is = ICUData.getRequiredStream(path);
+ CollationTailoring t = null;
RuntimeException e2 = null;
try {
- CollationDataReader.read(null, ICUBinary.getByteBufferFromInputStream(is), t);
+ ByteBuffer bytes = ICUBinary.getRequiredData("coll/ucadata.icu");
+ CollationTailoring t2 = new CollationTailoring(null);
+ CollationDataReader.read(null, bytes, t2);
+ // Keep t=null until after the root data has been read completely.
+ // Otherwise we would set a non-null root object if the data reader throws an exception.
+ t = t2;
} catch(IOException e) {
- t = null;
e2 = new MissingResourceException(
"IOException while reading CLDR root data",
- "CollationRoot", path);
+ "CollationRoot", ICUData.ICU_BUNDLE + "/coll/ucadata.icu");
} catch(RuntimeException e) {
- t = null;
e2 = e;
}
rootSingleton = t;
#* Copyright (C) 2008-2014, International Business Machines Corporation and *
#* others. All Rights Reserved. *
#*******************************************************************************
-#* This is the properties contains ICU runtime configuration
+#* This is the properties file which contains ICU runtime configuration.
#*
#
com.ibm.icu.text.MessagePattern.ApostropheMode = DOUBLE_OPTIONAL
#
+# [Internal Use Only]
# By default, DecimalFormat uses some internal equivalent character
# data in addition to ones in DecimalFormatSymbols for parsing
# decimal/grouping separators. When this property is true,
# @internal
com.ibm.icu.text.DecimalFormat.SkipExtendedSeparatorParsing = false
+# File system path where ICU looks for binary data files.
+# If not empty, then ICU looks for binary data files before looking for data on the classpath.
+# This string may contain multiple paths, see File.pathSeparatorChar.
+# Spaces (U+0020) around each path are trimmed away. Empty paths are ignored.
+# There may be individual files, for example, zoneinfo64.res,
+# or ICU4C .dat package files, for example, collation.dat or icudt54l.dat.
+# Each ICU data file may contain little-endian or big-endian data.
+# Each ICU data file's charset must be ASCII. (Platform type 'l' or 'b' but not 'e'.)
+# @draft ICU 54
+com.ibm.icu.impl.ICUBinary.dataPath =
-#
+#
# [Internal Use Only]
# Disable resource path scan for building full locale name list
# at run time.
package com.ibm.icu.impl;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.MissingResourceException;
+import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.VersionInfo;
-public final class ICUBinary
-{
+public final class ICUBinary {
+ /**
+ * Reads the ICU .dat package file format.
+ * Most methods do not modify the ByteBuffer in any way,
+ * not even its position or other state.
+ */
+ private static final class DatPackageReader {
+ /**
+ * .dat package data format ID "CmnD".
+ */
+ private static final int DATA_FORMAT = 0x436d6e44;
+
+ private static final class IsAcceptable implements Authenticate {
+ // @Override when we switch to Java 6
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == 1;
+ }
+ }
+ private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
+ /**
+ * Checks that the ByteBuffer contains a valid, usable ICU .dat package.
+ * Moves the buffer position from 0 to after the data header.
+ */
+ private static boolean validate(ByteBuffer bytes) {
+ try {
+ readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
+ } catch (IOException ignored) {
+ return false;
+ }
+ int count = bytes.getInt(bytes.position()); // Do not move the position.
+ if (count <= 0) {
+ return false;
+ }
+ // For each item, there is one ToC entry (8 bytes) and a name string
+ // and a data item of at least 16 bytes.
+ // (We assume no data item duplicate elimination for now.)
+ if (bytes.position() + 4 + count * (8 + 16) > bytes.capacity()) {
+ return false;
+ }
+ if (!startsWithPackageName(bytes, getNameOffset(bytes, 0)) ||
+ !startsWithPackageName(bytes, getNameOffset(bytes, count - 1))) {
+ return false;
+ }
+ return true;
+ }
+
+ private static boolean startsWithPackageName(ByteBuffer bytes, int start) {
+ // Compare all but the trailing 'b' or 'l' which depends on the platform.
+ int length = ICUData.PACKAGE_NAME.length() - 1;
+ for (int i = 0; i < length; ++i) {
+ if (bytes.get(start + i) != ICUData.PACKAGE_NAME.charAt(i)) {
+ return false;
+ }
+ }
+ // Check for 'b' or 'l' followed by '/'.
+ byte c = bytes.get(start + length++);
+ if ((c != 'b' && c != 'l') || bytes.get(start + length) != '/') {
+ return false;
+ }
+ return true;
+ }
+
+ private static ByteBuffer getData(ByteBuffer bytes, CharSequence key) {
+ int base = bytes.position();
+ int count = bytes.getInt(base);
+
+ // Do a binary search for the key.
+ int start = 0;
+ int limit = count;
+ while (start < limit) {
+ int mid = (start + limit) >>> 1;
+ int nameOffset = getNameOffset(bytes, mid);
+ // Skip "icudt54b/".
+ nameOffset += ICUData.PACKAGE_NAME.length() + 1;
+ int result = compareKeys(key, bytes, nameOffset);
+ if (result < 0) {
+ limit = mid;
+ } else if (result > 0) {
+ start = mid + 1;
+ } else {
+ // We found it!
+ ByteBuffer data = bytes.duplicate();
+ data.position(getDataOffset(bytes, mid));
+ data.limit(getDataOffset(bytes, mid + 1));
+ return ICUBinary.sliceWithOrder(data);
+ }
+ }
+ return null; // Not found or table is empty.
+ }
+
+ private static int getNameOffset(ByteBuffer bytes, int index) {
+ int base = bytes.position();
+ assert 0 <= index && index < bytes.getInt(base); // count
+ // The count integer (4 bytes)
+ // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
+ return base + bytes.getInt(base + 4 + index * 8);
+ }
+
+ private static int getDataOffset(ByteBuffer bytes, int index) {
+ int base = bytes.position();
+ int count = bytes.getInt(base);
+ if (index == count) {
+ // Return the limit of the last data item.
+ return bytes.capacity();
+ }
+ assert 0 <= index && index < count;
+ // The count integer (4 bytes)
+ // is followed by count (nameOffset, dataOffset) integer pairs (8 bytes per pair).
+ // The dataOffset follows the nameOffset (skip another 4 bytes).
+ return base + bytes.getInt(base + 4 + 4 + index * 8);
+ }
+ }
+
+ private static final class DataFile {
+ public final String itemPath;
+ /**
+ * null if a .dat package.
+ */
+ public final File path;
+ /**
+ * .dat package bytes, or null if not a .dat package.
+ * position() is after the header.
+ * Do not modify the position or other state, for thread safety.
+ */
+ public final ByteBuffer pkgBytes;
+
+ public DataFile(String item, File path) {
+ itemPath = item;
+ this.path = path;
+ pkgBytes = null;
+ }
+ public DataFile(String item, ByteBuffer bytes) {
+ itemPath = item;
+ path = null;
+ pkgBytes = bytes;
+ }
+ public String toString() {
+ return path.toString();
+ }
+ }
+ private static final List<DataFile> icuDataFiles = new ArrayList<DataFile>();
+
+ static {
+ // Normally com.ibm.icu.impl.ICUBinary.dataPath.
+ String dataPath = ICUConfig.get(ICUBinary.class.getName() + ".dataPath");
+ if (dataPath != null) {
+ addDataFilesFromPath(dataPath, icuDataFiles);
+ }
+ }
+
+ private static void addDataFilesFromPath(String dataPath, List<DataFile> files) {
+ // Split the path and find files in each location.
+ // This splitting code avoids the regex pattern compilation in String.split()
+ // and its array allocation.
+ // (There is no simple by-character split()
+ // and the StringTokenizer "is discouraged in new code".)
+ int pathStart = 0;
+ while (pathStart < dataPath.length()) {
+ int sepIndex = dataPath.indexOf(File.pathSeparatorChar, pathStart);
+ int pathLimit;
+ if (sepIndex >= 0) {
+ pathLimit = sepIndex;
+ } else {
+ pathLimit = dataPath.length();
+ }
+ String path = dataPath.substring(pathStart, pathLimit).trim();
+ if (path.endsWith(File.separator)) {
+ path = path.substring(0, path.length() - 1);
+ }
+ if (path.length() != 0) {
+ addDataFilesFromFolder(new File(path), new StringBuilder(), icuDataFiles);
+ }
+ if (sepIndex < 0) {
+ break;
+ }
+ pathStart = sepIndex + 1;
+ }
+ }
+
+ private static void addDataFilesFromFolder(File folder, StringBuilder itemPath,
+ List<DataFile> dataFiles) {
+ File[] files = folder.listFiles();
+ if (files == null || files.length == 0) {
+ return;
+ }
+ int folderPathLength = itemPath.length();
+ if (folderPathLength > 0) {
+ // The item path must use the ICU file separator character,
+ // not the platform-dependent File.separatorChar,
+ // so that the enumerated item paths match the paths requested by ICU code.
+ itemPath.append('/');
+ ++folderPathLength;
+ }
+ for (File file : files) {
+ String fileName = file.getName();
+ if (fileName.endsWith(".txt")) {
+ continue;
+ }
+ itemPath.append(fileName);
+ if (file.isDirectory()) {
+ // TODO: Within a folder, put all single files before all .dat packages?
+ addDataFilesFromFolder(file, itemPath, dataFiles);
+ } else if (fileName.endsWith(".dat")) {
+ ByteBuffer pkgBytes = mapFile(file);
+ if (pkgBytes != null && DatPackageReader.validate(pkgBytes)) {
+ dataFiles.add(new DataFile(itemPath.toString(), pkgBytes));
+ }
+ } else {
+ dataFiles.add(new DataFile(itemPath.toString(), file));
+ }
+ itemPath.setLength(folderPathLength);
+ }
+ }
+
+ /**
+ * Compares the length-specified input key with the
+ * NUL-terminated table key. (ASCII)
+ */
+ static int compareKeys(CharSequence key, ByteBuffer bytes, int offset) {
+ for (int i = 0;; ++i, ++offset) {
+ int c2 = bytes.get(offset);
+ if (c2 == 0) {
+ if (i == key.length()) {
+ return 0;
+ } else {
+ return 1; // key > table key because key is longer.
+ }
+ } else if (i == key.length()) {
+ return -1; // key < table key because key is shorter.
+ }
+ int diff = (int)key.charAt(i) - c2;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ }
+
// public inner interface ------------------------------------------------
-
+
/**
* Special interface for data authentication
*/
// public methods --------------------------------------------------------
+ /**
+ * Loads an ICU binary data file and returns it as a ByteBuffer.
+ * The buffer contents is normally read-only, but its position etc. can be modified.
+ *
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+ * @return The data as a read-only ByteBuffer,
+ * or null if the resource could not be found.
+ */
+ public static ByteBuffer getData(String itemPath) {
+ return getData(null, null, itemPath, false);
+ }
+
+ /**
+ * Loads an ICU binary data file and returns it as a ByteBuffer.
+ * The buffer contents is normally read-only, but its position etc. can be modified.
+ *
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+ * @param resourceName Resource name for use with the loader.
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+ * @return The data as a read-only ByteBuffer,
+ * or null if the resource could not be found.
+ */
+ public static ByteBuffer getData(ClassLoader loader, String resourceName, String itemPath) {
+ return getData(loader, resourceName, itemPath, false);
+ }
+
+ /**
+ * Loads an ICU binary data file and returns it as a ByteBuffer.
+ * The buffer contents is normally read-only, but its position etc. can be modified.
+ *
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+ * @return The data as a read-only ByteBuffer.
+ * @throws MissingResourceException if required==true and the resource could not be found
+ */
+ public static ByteBuffer getRequiredData(String itemPath) {
+ return getData(null, null, itemPath, true);
+ }
+
+ /**
+ * Loads an ICU binary data file and returns it as a ByteBuffer.
+ * The buffer contents is normally read-only, but its position etc. can be modified.
+ *
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+ * @param resourceName Resource name for use with the loader.
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+ * @return The data as a read-only ByteBuffer.
+ * @throws MissingResourceException if required==true and the resource could not be found
+ */
+// public static ByteBuffer getRequiredData(ClassLoader loader, String resourceName,
+// String itemPath) {
+// return getData(loader, resourceName, itemPath, true);
+// }
+
+ /**
+ * Loads an ICU binary data file and returns it as a ByteBuffer.
+ * The buffer contents is normally read-only, but its position etc. can be modified.
+ *
+ * @param loader Used for loader.getResourceAsStream() unless the data is found elsewhere.
+ * @param resourceName Resource name for use with the loader.
+ * @param itemPath Relative ICU data item path, for example "root.res" or "coll/ucadata.icu".
+ * @param required If the resource cannot be found,
+ * this method returns null (!required) or throws an exception (required).
+ * @return The data as a read-only ByteBuffer,
+ * or null if required==false and the resource could not be found.
+ * @throws MissingResourceException if required==true and the resource could not be found
+ */
+ private static ByteBuffer getData(ClassLoader loader, String resourceName,
+ String itemPath, boolean required) {
+ ByteBuffer bytes = getDataFromFile(itemPath);
+ if (bytes != null) {
+ return bytes;
+ }
+ if (loader == null) {
+ loader = ICUData.class.getClassLoader();
+ }
+ if (resourceName == null) {
+ resourceName = ICUData.ICU_BASE_NAME + '/' + itemPath;
+ }
+ InputStream is = ICUData.getStream(loader, resourceName, required);
+ if (is == null) {
+ return null;
+ }
+ try {
+ return getByteBufferFromInputStream(is);
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ private static ByteBuffer getDataFromFile(String itemPath) {
+ for (DataFile dataFile : icuDataFiles) {
+ if (dataFile.pkgBytes != null) {
+ ByteBuffer data = DatPackageReader.getData(dataFile.pkgBytes, itemPath);
+ if (data != null) {
+ return data;
+ }
+ } else if (itemPath.equals(dataFile.itemPath)) {
+ return mapFile(dataFile.path);
+ }
+ }
+ return null;
+ }
+
+ private static ByteBuffer mapFile(File path) {
+ FileInputStream file;
+ try {
+ file = new FileInputStream(path);
+ FileChannel channel = file.getChannel();
+ ByteBuffer bytes = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size());
+ // Close the file and its channel; this seems to keep the ByteBuffer valid.
+ // If not, then we will need to return the pair of (file, bytes).
+ file.close();
+ return bytes;
+ } catch(FileNotFoundException ignored) {
+ System.err.println(ignored);
+ } catch (IOException ignored) {
+ System.err.println(ignored);
+ }
+ return null;
+ }
+
/**
* Same as readHeader(), but returns a VersionInfo rather than a compact int.
*/
- public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
+ public static VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
int dataFormat,
Authenticate authenticate)
throws IOException {
* @return dataVersion
* @throws IOException if this is not a valid ICU data item of the expected dataFormat
*/
- public static final int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
+ public static int readHeader(ByteBuffer bytes, int dataFormat, Authenticate authenticate)
throws IOException {
assert bytes.position() == 0;
byte magic1 = bytes.get(2);
bytes.get(14) != (byte)(dataFormat >> 8) ||
bytes.get(15) != (byte)dataFormat ||
(authenticate != null && !authenticate.isDataVersionAcceptable(formatVersion))) {
- throw new IOException(HEADER_AUTHENTICATION_FAILED_);
+ throw new IOException(HEADER_AUTHENTICATION_FAILED_ +
+ String.format("; data format %02x%02x%02x%02x, format version %d.%d.%d.%d",
+ bytes.get(12), bytes.get(13), bytes.get(14), bytes.get(15),
+ formatVersion[0] & 0xff, formatVersion[1] & 0xff,
+ formatVersion[2] & 0xff, formatVersion[3] & 0xff));
}
bytes.position(headerSize);
(bytes.get(23) & 0xff);
}
- public static final void skipBytes(ByteBuffer bytes, int skipLength) {
+ /**
+ * Writes an ICU data header.
+ * Does not write a copyright string.
+ *
+ * @return The length of the header (number of bytes written).
+ * @throws IOException from the DataOutputStream
+ */
+ public static int writeHeader(int dataFormat, int formatVersion, int dataVersion,
+ DataOutputStream dos) throws IOException {
+ // ucmndata.h MappedData
+ dos.writeChar(32); // headerSize
+ dos.writeByte(MAGIC1);
+ dos.writeByte(MAGIC2);
+ // unicode/udata.h UDataInfo
+ dos.writeChar(20); // sizeof(UDataInfo)
+ dos.writeChar(0); // reservedWord
+ dos.writeByte(1); // isBigEndian
+ dos.writeByte(CHAR_SET_); // charsetFamily
+ dos.writeByte(CHAR_SIZE_); // sizeofUChar
+ dos.writeByte(0); // reservedByte
+ dos.writeInt(dataFormat);
+ dos.writeInt(formatVersion);
+ dos.writeInt(dataVersion);
+ // 8 bytes padding for 32 bytes headerSize (multiple of 16).
+ dos.writeLong(0);
+ assert dos.size() == 32;
+ return 32;
+ }
+
+ public static void skipBytes(ByteBuffer bytes, int skipLength) {
if (skipLength > 0) {
bytes.position(bytes.position() + skipLength);
}
}
+ /**
+ * Same as ByteBuffer.slice() plus preserving the byte order.
+ */
+ public static ByteBuffer sliceWithOrder(ByteBuffer bytes) {
+ ByteBuffer b = bytes.slice();
+ return b.order(bytes.order());
+ }
+
/**
* Reads the entire contents from the stream into a byte array
* and wraps it into a ByteBuffer. Closes the InputStream at the end.
*/
- public static final ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
+ public static ByteBuffer getByteBufferFromInputStream(InputStream is) throws IOException {
try {
int avail = is.available();
byte[] bytes = new byte[avail];
}
}
- private static final void readFully(InputStream is, byte[] bytes, int offset, int avail)
+ private static void readFully(InputStream is, byte[] bytes, int offset, int avail)
throws IOException {
while (avail > 0) {
int numRead = is.read(bytes, offset, avail);
/*
*******************************************************************************
- * Copyright (C) 2004-2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 2004-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*
* Created on Feb 4, 2004
*/
package com.ibm.icu.impl;
+import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.MissingResourceException;
+import java.util.logging.Logger;
+
+import com.ibm.icu.util.VersionInfo;
/**
* Provides access to ICU data files as InputStreams. Implements security checking.
*/
public final class ICUData {
- /*
- * Return a URL to the ICU resource names resourceName. The
- * resource name should either be an absolute path, or a path relative to
- * com.ibm.icu.impl (e.g., most likely it is 'data/foo'). If required
- * is true, throw an MissingResourceException instead of returning a null result.
+ /**
+ * The data path to be used with getBundleInstance API
+ */
+ static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
+ /**
+ * The ICU data package name.
+ * This is normally the name of the .dat package, and the prefix (plus '/')
+ * of the package entry names.
+ */
+ static final String PACKAGE_NAME = "icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
+ /**
+ * The data path to be used with Class.getResourceAsStream().
+ */
+ public static final String ICU_BUNDLE = "data/" + PACKAGE_NAME;
+
+ /**
+ * The base name of ICU data to be used with ClassLoader.getResourceAsStream(),
+ * ICUResourceBundle.getBundleInstance() etc.
+ */
+ public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
+
+ /**
+ * The base name of collation data to be used with getBundleInstance API
*/
+ public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
+
+ /**
+ * The base name of rbbi data to be used with getData API
+ */
+ public static final String ICU_BRKITR_NAME = "brkitr";
+
+ /**
+ * The base name of rbbi data to be used with getBundleInstance API
+ */
+ public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + '/' + ICU_BRKITR_NAME;
+
+ /**
+ * The base name of rbnf data to be used with getBundleInstance API
+ */
+ public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
+
+ /**
+ * The base name of transliterator data to be used with getBundleInstance API
+ */
+ public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
+
+ public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
+ public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
+ public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
+ public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
+
+ /**
+ * For testing (otherwise false): When reading an InputStream from a Class or ClassLoader
+ * (that is, not from a file), log when the stream contains ICU binary data.
+ *
+ * This cannot be ICUConfig'ured because ICUConfig calls ICUData.getStream()
+ * to read the properties file, so we would get a circular dependency
+ * in the class initialization.
+ */
+ private static final boolean logBinaryDataFromInputStream = false;
+ private static final Logger logger = logBinaryDataFromInputStream ?
+ Logger.getLogger(ICUData.class.getName()) : null;
+
public static boolean exists(final String resourceName) {
URL i = null;
if (System.getSecurityManager() != null) {
}
return i != null;
}
-
+
private static InputStream getStream(final Class<?> root, final String resourceName, boolean required) {
InputStream i = null;
-
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
public InputStream run() {
if (i == null && required) {
throw new MissingResourceException("could not locate data " +resourceName, root.getPackage().getName(), resourceName);
}
+ checkStreamForBinaryData(i, resourceName);
return i;
}
- private static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
+ /**
+ * Should be called only from ICUBinary.getData() or from convenience overloads here.
+ */
+ static InputStream getStream(final ClassLoader loader, final String resourceName, boolean required) {
InputStream i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
if (i == null && required) {
throw new MissingResourceException("could not locate data", loader.toString(), resourceName);
}
+ checkStreamForBinaryData(i, resourceName);
return i;
}
-
+
+ @SuppressWarnings("unused") // used if logBinaryDataFromInputStream == true
+ private static void checkStreamForBinaryData(InputStream is, String resourceName) {
+ if (logBinaryDataFromInputStream && is != null && resourceName.indexOf(PACKAGE_NAME) >= 0) {
+ try {
+ is.mark(32);
+ byte[] b = new byte[32];
+ int len = is.read(b);
+ if (len == 32 && b[2] == (byte)0xda && b[3] == 0x27) {
+ String msg = String.format(
+ "ICU binary data file loaded from Class/ClassLoader as InputStream " +
+ "from %s: MappedData %02x%02x%02x%02x dataFormat %02x%02x%02x%02x",
+ resourceName,
+ b[0], b[1], b[2], b[3],
+ b[12], b[13], b[14], b[15]);
+ logger.info(msg);
+ }
+ is.reset();
+ } catch (IOException ignored) {
+ }
+ }
+ }
+
public static InputStream getStream(ClassLoader loader, String resourceName){
- return getStream(loader,resourceName, false);
+ return getStream(loader,resourceName, false);
}
public static InputStream getRequiredStream(ClassLoader loader, String resourceName){
return getStream(loader, resourceName, true);
}
- /*
+ /**
* Convenience override that calls getStream(ICUData.class, resourceName, false);
+ * Returns null if the resource could not be found.
*/
public static InputStream getStream(String resourceName) {
return getStream(ICUData.class, resourceName, false);
}
-
- /*
+
+ /**
* Convenience method that calls getStream(ICUData.class, resourceName, true).
+ * @throws MissingResourceException if the resource could not be found
*/
public static InputStream getRequiredStream(String resourceName) {
return getStream(ICUData.class, resourceName, true);
}
- /*
+ /**
* Convenience override that calls getStream(root, resourceName, false);
+ * Returns null if the resource could not be found.
*/
public static InputStream getStream(Class<?> root, String resourceName) {
return getStream(root, resourceName, false);
}
-
- /*
+
+ /**
* Convenience method that calls getStream(root, resourceName, true).
+ * @throws MissingResourceException if the resource could not be found
*/
public static InputStream getRequiredStream(Class<?> root, String resourceName) {
return getStream(root, resourceName, true);
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.UResourceBundleIterator;
import com.ibm.icu.util.UResourceTypeMismatchException;
-import com.ibm.icu.util.VersionInfo;
public class ICUResourceBundle extends UResourceBundle {
/**
* The data path to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- protected static final String ICU_DATA_PATH = "com/ibm/icu/impl/";
+ @Deprecated
+ protected static final String ICU_DATA_PATH = ICUData.ICU_DATA_PATH;
/**
* The data path to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_BUNDLE = "data/icudt" + VersionInfo.ICU_DATA_VERSION_PATH;
+ @Deprecated
+ public static final String ICU_BUNDLE = ICUData.ICU_BUNDLE;
/**
* The base name of ICU data to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_BASE_NAME = ICU_DATA_PATH + ICU_BUNDLE;
+ @Deprecated
+ public static final String ICU_BASE_NAME = ICUData.ICU_BASE_NAME;
/**
* The base name of collation data to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_COLLATION_BASE_NAME = ICU_BASE_NAME + "/coll";
-
- /**
- * The base name of rbbi data to be used with getData API
- */
- public static final String ICU_BRKITR_NAME = "/brkitr";
+ @Deprecated
+ public static final String ICU_COLLATION_BASE_NAME = ICUData.ICU_COLLATION_BASE_NAME;
/**
* The base name of rbbi data to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_BRKITR_BASE_NAME = ICU_BASE_NAME + ICU_BRKITR_NAME;
+ @Deprecated
+ public static final String ICU_BRKITR_BASE_NAME = ICUData.ICU_BRKITR_BASE_NAME;
/**
* The base name of rbnf data to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_RBNF_BASE_NAME = ICU_BASE_NAME + "/rbnf";
+ @Deprecated
+ public static final String ICU_RBNF_BASE_NAME = ICUData.ICU_RBNF_BASE_NAME;
/**
* The base name of transliterator data to be used with getBundleInstance API
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
*/
- public static final String ICU_TRANSLIT_BASE_NAME = ICU_BASE_NAME + "/translit";
+ @Deprecated
+ public static final String ICU_TRANSLIT_BASE_NAME = ICUData.ICU_TRANSLIT_BASE_NAME;
- public static final String ICU_LANG_BASE_NAME = ICU_BASE_NAME + "/lang";
- public static final String ICU_CURR_BASE_NAME = ICU_BASE_NAME + "/curr";
- public static final String ICU_REGION_BASE_NAME = ICU_BASE_NAME + "/region";
- public static final String ICU_ZONE_BASE_NAME = ICU_BASE_NAME + "/zone";
+ /**
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
+ */
+ @Deprecated
+ public static final String ICU_LANG_BASE_NAME = ICUData.ICU_LANG_BASE_NAME;
+ /**
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
+ */
+ @Deprecated
+ public static final String ICU_CURR_BASE_NAME = ICUData.ICU_CURR_BASE_NAME;
+ /**
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
+ */
+ @Deprecated
+ public static final String ICU_REGION_BASE_NAME = ICUData.ICU_REGION_BASE_NAME;
+ /**
+ * @deprecated because not specific to resource bundles; use the ICUData constants instead
+ */
+ @Deprecated
+ public static final String ICU_ZONE_BASE_NAME = ICUData.ICU_ZONE_BASE_NAME;
private static final String NO_INHERITANCE_MARKER = "\u2205\u2205\u2205";
@Override
protected ICUResourceBundleReader createInstance(ReaderInfo key, ReaderInfo data) {
String fullName = ICUResourceBundleReader.getFullName(data.baseName, data.localeID);
- InputStream stream = ICUData.getStream(data.loader, fullName);
- if (stream == null) {
- return NULL_READER;
+ try {
+ ByteBuffer inBytes;
+ if (data.baseName != null && data.baseName.startsWith(ICUData.ICU_BASE_NAME)) {
+ String itemPath = fullName.substring(ICUData.ICU_BASE_NAME.length() + 1);
+ inBytes = ICUBinary.getData(data.loader, fullName, itemPath);
+ if (inBytes == null) {
+ return NULL_READER;
+ }
+ } else {
+ InputStream stream = ICUData.getStream(data.loader, fullName);
+ if (stream == null) {
+ return NULL_READER;
+ }
+ inBytes = ICUBinary.getByteBufferFromInputStream(stream);
+ }
+ return new ICUResourceBundleReader(inBytes, data.baseName, data.localeID, data.loader);
+ } catch (IOException ex) {
+ throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
}
- return new ICUResourceBundleReader(stream, data.baseName, data.localeID, data.loader);
}
}
private ICUResourceBundleReader() {
}
- private ICUResourceBundleReader(InputStream stream, String baseName, String localeID, ClassLoader loader) {
- try {
- ByteBuffer inBytes = ICUBinary.getByteBufferFromInputStream(stream);
- init(inBytes);
- } catch (IOException ex) {
- String fullName = ICUResourceBundleReader.getFullName(baseName, localeID);
- throw new ICUUncheckedIOException("Data file " + fullName + " is corrupt - " + ex.getMessage(), ex);
- }
+ private ICUResourceBundleReader(ByteBuffer inBytes,
+ String baseName, String localeID,
+ ClassLoader loader) throws IOException {
+ init(inBytes);
// set pool bundle keys if necessary
if (usesPoolBundle) {
private void init(ByteBuffer inBytes) throws IOException {
dataVersion = ICUBinary.readHeader(inBytes, DATA_FORMAT, IS_ACCEPTABLE);
boolean isFormatVersion10 = inBytes.get(16) == 1 && inBytes.get(17) == 0;
- bytes = inBytes.slice();
+ bytes = ICUBinary.sliceWithOrder(inBytes);
int dataLength = bytes.remaining();
if(DEBUG) System.out.println("The ByteBuffer is direct (memory-mapped): " + bytes.isDirect());
if(_16BitTop > keysTop) {
int num16BitUnits = (_16BitTop - keysTop) * 2;
bytes.position(keysTop << 2);
- b16BitUnits = bytes.slice().asCharBuffer();
+ b16BitUnits = bytes.asCharBuffer();
b16BitUnits.limit(num16BitUnits);
maxOffset |= num16BitUnits - 1;
} else {
// unlike regular bundles' key strings for which indexes
// are based on the start of the bundle data.
bytes.position((1 + indexLength) << 2);
- bytes = bytes.slice();
+ bytes = ICUBinary.sliceWithOrder(bytes);
} else {
localKeyLimit = getIndexesInt(URES_INDEX_KEYS_TOP) << 2;
}
return makeKeyStringFromBytes(poolBundleKeys, keyOffset & 0x7fffffff);
}
}
- // Compare the length-specified input key with the
- // NUL-terminated table key.
- private static int compareKeys(CharSequence key, ByteBuffer keyBytes, int keyOffset) {
- for(int i = 0;; ++i, ++keyOffset) {
- int c2 = keyBytes.get(keyOffset);
- if(c2 == 0) {
- if(i == key.length()) {
- return 0;
- } else {
- return 1; // key > table key because key is longer.
- }
- } else if(i == key.length()) {
- return -1; // key < table key because key is shorter.
- }
- int diff = (int)key.charAt(i) - c2;
- if(diff != 0) {
- return diff;
- }
- }
- }
private int compareKeys(CharSequence key, char keyOffset) {
if(keyOffset < localKeyLimit) {
- return compareKeys(key, bytes, keyOffset);
+ return ICUBinary.compareKeys(key, bytes, keyOffset);
} else {
- return compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
+ return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset - localKeyLimit);
}
}
private int compareKeys32(CharSequence key, int keyOffset) {
if(keyOffset >= 0) {
- return compareKeys(key, bytes, keyOffset);
+ return ICUBinary.compareKeys(key, bytes, keyOffset);
} else {
- return compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
+ return ICUBinary.compareKeys(key, poolBundleKeys, keyOffset & 0x7fffffff);
}
}
offset += 4;
ByteBuffer result = bytes.duplicate();
result.position(offset).limit(offset + length);
- result = result.slice();
+ result = ICUBinary.sliceWithOrder(result);
if(!result.isReadOnly()) {
result = result.asReadOnlyBuffer();
}
protected Norm2AllModes createInstance(String key, ByteBuffer bytes) {
Normalizer2Impl impl;
if(bytes==null) {
- impl=new Normalizer2Impl().load(ICUResourceBundle.ICU_BUNDLE+"/"+key+".nrm");
+ impl=new Normalizer2Impl().load(key+".nrm");
} else {
impl=new Normalizer2Impl().load(bytes);
}
private static final class Norm2AllModesSingleton {
private Norm2AllModesSingleton(String name) {
try {
- Normalizer2Impl impl=new Normalizer2Impl().load(
- ICUResourceBundle.ICU_BUNDLE+"/"+name+".nrm");
+ Normalizer2Impl impl=new Normalizer2Impl().load(name+".nrm");
allModes=new Norm2AllModes(impl);
} catch(RuntimeException e) {
exception=e;
}
}
public Normalizer2Impl load(String name) {
- try {
- return load(ICUBinary.getByteBufferFromInputStream(ICUData.getRequiredStream(name)));
- } catch(IOException e) {
- throw new ICUUncheckedIOException(e);
- }
+ return load(ICUBinary.getRequiredData(name));
}
private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
header.signature = bytes.getInt();
switch (header.signature) {
case 0x54726932:
- bytes.order(ByteOrder.BIG_ENDIAN);
+ // The buffer is already set to the trie data byte order.
break;
case 0x32697254:
- bytes.order(ByteOrder.LITTLE_ENDIAN);
+ // Temporarily reverse the byte order.
+ boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN;
+ bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
header.signature = 0x54726932;
break;
default:
package com.ibm.icu.impl;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
// port of ubidi_openProps()
private UBiDiProps() throws IOException{
- InputStream is=ICUData.getStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
- ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+ ByteBuffer bytes=ICUBinary.getData(DATA_FILE_NAME);
readData(bytes);
}
package com.ibm.icu.impl;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
// port of ucase_openProps()
private UCaseProps() throws IOException {
- InputStream is=ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/"+DATA_FILE_NAME);
- ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+ ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME);
readData(bytes);
}
package com.ibm.icu.impl;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Locale;
import java.util.MissingResourceException;
/**
* Default name of the name datafile
*/
- private static final String NAME_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/unames.icu";
+ private static final String FILE_NAME_ = "unames.icu";
/**
* Shift count to retrieve group information
*/
*/
private UCharacterName() throws IOException
{
- InputStream is = ICUData.getRequiredStream(NAME_FILE_NAME_);
- ByteBuffer b = ICUBinary.getByteBufferFromInputStream(is);
+ ByteBuffer b = ICUBinary.getRequiredData(FILE_NAME_);
UCharacterNameReader reader = new UCharacterNameReader(b);
reader.read(this);
}
package com.ibm.icu.impl;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.MissingResourceException;
/**
* Default name of the datafile
*/
- private static final String DATA_FILE_NAME_ = ICUResourceBundle.ICU_BUNDLE+"/uprops.icu";
+ private static final String DATA_FILE_NAME_ = "uprops.icu";
/**
* Shift value for lead surrogate to form a supplementary character.
}
// jar access
- InputStream is = ICUData.getRequiredStream(DATA_FILE_NAME_);
- ByteBuffer bytes=ICUBinary.getByteBufferFromInputStream(is);
+ ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_);
m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable());
// Read or skip the 16 indexes.
int propertyOffset = bytes.getInt();
package com.ibm.icu.impl;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.MissingResourceException;
}
private UPropertyAliases() throws IOException {
- InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE+"/pnames.icu");
- ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(stream);
+ ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
load(bytes);
}
/*
******************************************************************************
- * Copyright (C) 2007-2011, International Business Machines Corporation and *
- * others. All Rights Reserved. *
+ * Copyright (C) 2007-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
******************************************************************************
*/
if (ln != null) {
String name = PATH + "pfd_" + ln + ".xml";
try {
- InputStream is = ICUData.getStream(getClass(), name);
- if (is == null) {
- throw new MissingResourceException(
- "no resource named " + name, name, "");
- } else {
- DataRecord dr = DataRecord.read(ln,
- new XMLRecordReader(new InputStreamReader(
- is, "UTF-8")));
- if (dr != null) {
- // debug
- // if (false && ln.equals("ar_EG")) {
- // OutputStreamWriter osw = new
- // OutputStreamWriter(System.out, "UTF-8");
- // XMLRecordWriter xrw = new
- // XMLRecordWriter(osw);
- // dr.write(xrw);
- // osw.flush();
- // }
- ld = new PeriodFormatterData(localeName, dr);
- }
+ InputStream is = ICUData.getRequiredStream(getClass(), name);
+ DataRecord dr = DataRecord.read(ln,
+ new XMLRecordReader(new InputStreamReader(
+ is, "UTF-8")));
+ if (dr != null) {
+ // debug
+ // if (false && ln.equals("ar_EG")) {
+ // OutputStreamWriter osw = new
+ // OutputStreamWriter(System.out, "UTF-8");
+ // XMLRecordWriter xrw = new
+ // XMLRecordWriter(osw);
+ // dr.write(xrw);
+ // osw.flush();
+ // }
+ ld = new PeriodFormatterData(localeName, dr);
}
} catch (UnsupportedEncodingException e) {
throw new MissingResourceException(
package com.ibm.icu.text;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Locale;
import java.util.MissingResourceException;
try {
String typeKey = KIND_NAMES[kind];
String brkfname = rb.getStringWithFallback("boundaries/" + typeKey);
- String rulesFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + brkfname;
- InputStream ruleStream = ICUData.getStream(rulesFileName);
- bytes = ICUBinary.getByteBufferFromInputStream(ruleStream);
+ String rulesFileName = ICUData.ICU_BRKITR_NAME+ '/' + brkfname;
+ bytes = ICUBinary.getData(rulesFileName);
}
catch (Exception e) {
throw new MissingResourceException(e.toString(),"","");
package com.ibm.icu.text;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.Assert;
public static DictionaryMatcher loadDictionaryFor(String dictType) throws IOException {
ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BRKITR_BASE_NAME);
String dictFileName = rb.getStringWithFallback("dictionaries/" + dictType);
- dictFileName = ICUResourceBundle.ICU_BUNDLE +ICUResourceBundle.ICU_BRKITR_NAME+ "/" + dictFileName;
- InputStream is = ICUData.getStream(dictFileName);
- ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(is);
+ dictFileName = ICUData.ICU_BRKITR_NAME + '/' + dictFileName;
+ ByteBuffer bytes = ICUBinary.getRequiredData(dictFileName);
ICUBinary.readHeader(bytes, DATA_FORMAT_ID, null);
int[] indexes = new int[IX_COUNT];
// TODO: read indexes[IX_STRING_TRIE_OFFSET] first, then read a variable-length indexes[]
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.ICUBinary.Authenticate;
/**
* <p>Internal class used for Rule Based Break Iterators</p>
CharTrie fTrie;
String fRuleSource;
int fStatusTable[];
-
+
+ private boolean isBigEndian;
+
+ static final int DATA_FORMAT = 0x42726b20; // "Brk "
+ static final int FORMAT_VERSION = 0x03010000; // 3.1
+
+ private static final class IsAcceptable implements Authenticate {
+ // @Override when we switch to Java 6
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == (FORMAT_VERSION >>> 24);
+ }
+ }
+ private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
+
//
// Indexes to fields in the ICU4C style binary form of the RBBI Data Header
// Used by the rule compiler when flattening the data.
// Index offsets to header fields of a state table
// struct RBBIStateTable {... in the C version.
//
- final static int NUMSTATES = 0;
- final static int ROWLEN = 2;
- final static int FLAGS = 4;
- final static int RESERVED_2 = 6;
- final static int ROW_DATA = 8;
-
+ static final int NUMSTATES = 0;
+ static final int ROWLEN = 2;
+ static final int FLAGS = 4;
+ //ivate static final int RESERVED_2 = 6;
+ private static final int ROW_DATA = 8;
+
// Bit selectors for the "FLAGS" field of the state table header
// enum RBBIStateTableFlags in the C version.
//
RBBIDataWrapper This = new RBBIDataWrapper();
- // Seek past the ICU data header.
- // TODO: verify that the header looks good.
- ICUBinary.skipBytes(bytes, 0x80);
+ ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
+ This.isBigEndian = bytes.order() == ByteOrder.BIG_ENDIAN;
// Read in the RBBI data header...
This.fHeader = new RBBIDataHeader();
This.fHeader.fMagic = bytes.getInt();
- This.fHeader.fVersion = bytes.getInt();
- This.fHeader.fFormatVersion[0] = (byte) (This.fHeader.fVersion >> 24);
- This.fHeader.fFormatVersion[1] = (byte) (This.fHeader.fVersion >> 16);
- This.fHeader.fFormatVersion[2] = (byte) (This.fHeader.fVersion >> 8);
- This.fHeader.fFormatVersion[3] = (byte) (This.fHeader.fVersion);
+ // Read the same 4 bytes as an int and as a byte array: The data format could be
+ // the old fVersion=1 (TODO: probably not with a real ICU data header?)
+ // or the new fFormatVersion=3.x.
+ This.fHeader.fVersion = bytes.getInt(bytes.position());
+ This.fHeader.fFormatVersion[0] = bytes.get();
+ This.fHeader.fFormatVersion[1] = bytes.get();
+ This.fHeader.fFormatVersion[2] = bytes.get();
+ This.fHeader.fFormatVersion[3] = bytes.get();
This.fHeader.fLength = bytes.getInt();
This.fHeader.fCatCount = bytes.getInt();
This.fHeader.fFTable = bytes.getInt();
///CLOVER:OFF
// Getters for fields from the state table header
//
- final static int getNumStates(short table[]) {
- int hi = table[NUMSTATES];
- int lo = table[NUMSTATES+1];
- int val = (hi<<16) + (lo&0x0000ffff);
- return val;
+ private int getStateTableNumStates(short table[]) {
+ if (isBigEndian) {
+ return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
+ } else {
+ return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
+ }
}
///CLOVER:ON
+ int getStateTableFlags(short table[]) {
+ // This works for up to 15 flags bits.
+ return table[isBigEndian ? FLAGS + 1 : FLAGS];
+ }
+
///CLOVER:OFF
/* Debug function to display the break iterator data. */
void dump() {
System.out.print("-");
}
System.out.println();
- for (state=0; state< getNumStates(table); state++) {
+ for (state=0; state< getStateTableNumStates(table); state++) {
dumpRow(table, state);
}
System.out.println();
//
-// Copyright (C) 2002-2009, International Business Machines Corporation and others.
+// Copyright (C) 2002-2014, International Business Machines Corporation and others.
// All Rights Reserved.
//
//
import java.util.Set;
import com.ibm.icu.impl.Assert;
+import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.ICUDebug;
class RBBIRuleBuilder {
//
// Write out an ICU Data Header
- // TODO: actually create a real header, rather than just a placeholder.
- // The empty placeholder is ok for compile-and-go from within ICU4J.
- // Replicating the ICU4C genbrk tool for building .brk resources would need a real header.
//
- byte[] ICUDataHeader = new byte[0x80];
- dos.write(ICUDataHeader);
+ ICUBinary.writeHeader(RBBIDataWrapper.DATA_FORMAT, RBBIDataWrapper.FORMAT_VERSION, 0, dos);
//
// Write out the RBBIDataHeader
int state = START_STATE;
int row = fRData.getRowIndex(state);
short category = 3;
- short flagsState = stateTable[RBBIDataWrapper.FLAGS+1];
+ int flagsState = fRData.getStateTableFlags(stateTable);
int mode = RBBI_RUN;
if ((flagsState & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
int initialPosition = 0;
int lookaheadResult = 0;
boolean lookAheadHardBreak =
- (stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
+ (fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
// handlePrevious() never gets the rule status.
// Flag the status as invalid; if the user ever asks for status, we will need
row = fRData.getRowIndex(state);
category = 3; // TODO: obsolete? from the old start/run mode scheme?
mode = RBBI_RUN;
- if ((stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
+ if ((fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_BOF_REQUIRED) != 0) {
category = 2;
mode = RBBI_START;
}
import java.io.DataOutputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.Reader;
import java.nio.ByteBuffer;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.Trie2Writable;
+import com.ibm.icu.impl.ICUBinary.Authenticate;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.lang.UProperty;
}
}
+ private static final int DATA_FORMAT = 0x43667520; // "Cfu "
+ private static final class IsAcceptable implements Authenticate {
+ // @Override when we switch to Java 6
+ public boolean isDataVersionAcceptable(byte version[]) {
+ return version[0] == 1;
+ }
+ }
+ private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
- // getDefault() - Create a SpoofData instance that is built from
- // the data baked into the default ICU data.
+ private static final class DefaultData {
+ private static SpoofData INSTANCE = null;
- static SpoofData getDefault() {
- // TODO: Cache it. Lazy create, keep until cleanup.
- SpoofData This = null;
- try {
- InputStream is = com.ibm.icu.impl.ICUData.getRequiredStream(com.ibm.icu.impl.ICUResourceBundle.ICU_BUNDLE
- + "/confusables.cfu");
- This = new SpoofData(ICUBinary.getByteBufferFromInputStream(is));
- is.close();
- }
- catch (IOException e) {
- // Return null in this case.
+ static {
+ try {
+ INSTANCE = new SpoofData(ICUBinary.getRequiredData("confusables.cfu"));
+ } catch (IOException ignored) {
+ }
}
- return This;
+ }
+
+ /**
+ * @return instance for Unicode standard data
+ */
+ static SpoofData getDefault() {
+ return DefaultData.INSTANCE;
}
// SpoofChecker Data constructor for use from data builder.
// Constructor for use when creating from prebuilt default data.
// A ByteBuffer is what the ICU internal data loading functions provide.
SpoofData(ByteBuffer bytes) throws java.io.IOException {
- // Seek past the ICU data header.
- // TODO: verify that the header looks good.
- ICUBinary.skipBytes(bytes, 0x80);
+ ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
bytes.mark();
readData(bytes);
}
import com.ibm.icu.impl.CharTrie;
import com.ibm.icu.impl.ICUBinary;
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.StringPrepDataReader;
import com.ibm.icu.impl.UBiDiProps;
import com.ibm.icu.lang.UCharacter;
*/
public StringPrep(InputStream inputStream) throws IOException{
// TODO: Add a public constructor that takes ByteBuffer directly.
- ByteBuffer bytes = ICUBinary.getByteBufferFromInputStream(inputStream);
+ this(ICUBinary.getByteBufferFromInputStream(inputStream));
+ }
+
+ private StringPrep(ByteBuffer bytes) throws IOException {
StringPrepDataReader reader = new StringPrepDataReader(bytes);
// read the indexes
}
if (instance == null) {
- InputStream stream = ICUData.getRequiredStream(ICUResourceBundle.ICU_BUNDLE + "/"
- + PROFILE_NAMES[profile] + ".spp");
- if (stream != null) {
+ ByteBuffer bytes = ICUBinary.getRequiredData(PROFILE_NAMES[profile] + ".spp");
+ if (bytes != null) {
try {
- try {
- instance = new StringPrep(stream);
- } finally {
- stream.close();
- }
+ instance = new StringPrep(bytes);
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
/*
*******************************************************************************
- * Copyright (C) 2002-2012, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- *
+ * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
*******************************************************************************
*/
private void FromUnicodeCase(ConversionCase cc) {
-
// create charset encoder for conversion test
CharsetProviderICU provider = new CharsetProviderICU();
CharsetEncoder encoder = null;
? (Charset) provider.charsetForName(cc.charset.substring(1),
"com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
: (Charset) provider.charsetForName(cc.charset);
- encoder = (CharsetEncoder) charset.newEncoder();
- encoder.onMalformedInput(CodingErrorAction.REPLACE);
- encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
- if (encoder instanceof CharsetEncoderICU) {
- ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
- if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
- errln("Fallback could not be set for " + cc.charset);
+ if (charset != null) {
+ encoder = (CharsetEncoder) charset.newEncoder();
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ if (encoder instanceof CharsetEncoderICU) {
+ ((CharsetEncoderICU)encoder).setFallbackUsed(cc.fallbacks);
+ if (((CharsetEncoderICU)encoder).isFallbackUsed() != cc.fallbacks) {
+ errln("Fallback could not be set for " + cc.charset);
+ }
}
}
-
} catch (Exception e) {
+ encoder = null;
+ }
+ if (encoder == null) {
if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
} else {
}
return;
}
-
+
// set the callback for the encoder
if (cc.cbErrorAction != null) {
if (cc.cbEncoder != null) {
? (Charset) provider.charsetForName(cc.charset.substring(1),
"com/ibm/icu/dev/data/testdata", this.getClass().getClassLoader())
: (Charset) provider.charsetForName(cc.charset);
- decoder = (CharsetDecoder) charset.newDecoder();
- decoder.onMalformedInput(CodingErrorAction.REPLACE);
- decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-
+ if (charset != null) {
+ decoder = (CharsetDecoder) charset.newDecoder();
+ decoder.onMalformedInput(CodingErrorAction.REPLACE);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ }
} catch (Exception e) {
// TODO implement loading of test data.
+ decoder = null;
+ }
+ if (decoder == null) {
if (cc.charset.charAt(0) == UNSUPPORTED_CHARSET_SYMBOL) {
logln("Skipping test:(" + cc.charset.substring(1) + ") due to ICU Charset not supported at this time");
} else {
//checking for converter that are not supported at this point
try{
- if(charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
+ if(charset==null ||
+ charset.name()=="BOCU-1" ||charset.name()== "SCSU"|| charset.name()=="lmbcs1" || charset.name()== "lmbcs2" ||
charset.name()== "lmbcs3" || charset.name()== "lmbcs4" || charset.name()=="lmbcs5" || charset.name()=="lmbcs6" ||
charset.name()== "lmbcs8" || charset.name()=="lmbcs11" || charset.name()=="lmbcs16" || charset.name()=="lmbcs17" ||
charset.name()=="lmbcs18"|| charset.name()=="lmbcs19"){
-
- logln("Converter not supported at this point :" +charset.displayName());
+ logln("Converter not supported at this point :" + cc.charset);
return;
}