super(cs);
}
+ @Override
protected CoderResult decodeLoopCoreOptimized(ByteBuffer source, CharBuffer target,
byte[] sourceArray, char[] targetArray, int oldSource, int offset, int limit) {
return null;
}
+ @Override
protected CoderResult decodeLoopCoreUnoptimized(ByteBuffer source, CharBuffer target) {
byte ch;
/*
return CoderResult.OVERFLOW;
}
}
-
+
return CoderResult.UNDERFLOW;
}
}
super(cs);
}
+ @Override
protected final CoderResult encodeLoopCoreOptimized(CharBuffer source, ByteBuffer target,
char[] sourceArray, byte[] targetArray, int oldSource, int offset, int limit,
boolean flush) {
* char in the source is within the correct range
*/
for (i = oldSource; i < limit; i++) {
- ch = (int) sourceArray[i];
+ ch = sourceArray[i];
if ((ch & 0xff00) == 0) {
targetArray[i + offset] = (byte) ch;
} else {
return null;
}
+ @Override
protected final CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) {
int ch;
* perform 88591 conversion from the source buffer to the target buffer, making sure
* each char in the source is within the correct range
*/
-
+
while (source.hasRemaining()) {
- ch = (int) source.get();
+ ch = source.get();
if ((ch & 0xff00) == 0) {
if (target.hasRemaining()) {
target.put((byte) ch);
return encodeMalformedOrUnmappable(source, ch, flush);
}
}
-
+
return CoderResult.UNDERFLOW;
}
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoder88591(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoder88591(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
setFillIn.add(0,0xff);
}
super(cs);
}
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
boolean flush) {
if (!source.hasRemaining()) {
int sourceOffset = source.arrayOffset();
int sourceIndex = oldSource + sourceOffset;
int sourceLength = source.limit() - oldSource;
-
+
char[] targetArray = target.array();
int targetOffset = target.arrayOffset();
int targetIndex = oldTarget + targetOffset;
*/
while (source.hasRemaining()) {
ch = source.get() & 0xff;
-
+
if ((ch & 0x80) == 0) {
if (target.hasRemaining()) {
target.put((char)ch);
return decodeMalformedOrUnmappable(ch);
}
}
-
+
return CoderResult.UNDERFLOW;
}
private final static int NEED_TO_WRITE_BOM = 1;
+ @Override
protected void implReset() {
super.implReset();
fromUnicodeStatus = NEED_TO_WRITE_BOM;
}
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
boolean flush) {
if (!source.hasRemaining()) {
}
} else {
/* unoptimized loop */
-
+
cr = encodeLoopCoreUnoptimized(source, target, flush);
-
+
if (cr == CoderResult.OVERFLOW) {
source.position(source.position() - 1); /* rewind by 1 */
}
* perform ascii conversion from the source array to the target array, making sure each
* char in the source is within the correct range
*/
- for (i = oldSource; i < limit && (((ch = (int) sourceArray[i]) & 0xff80) == 0); i++)
+ for (i = oldSource; i < limit && (((ch = sourceArray[i]) & 0xff80) == 0); i++)
targetArray[i + offset] = (byte) ch;
/*
protected CoderResult encodeLoopCoreUnoptimized(CharBuffer source, ByteBuffer target, boolean flush) {
int ch;
-
+
/*
* perform ascii conversion from the source buffer to the target buffer, making sure
* each char in the source is within the correct range
*/
while (source.hasRemaining()) {
- ch = (int) source.get();
-
+ ch = source.get();
+
if ((ch & 0xff80) == 0) {
if (target.hasRemaining()) {
target.put((byte) ch);
return encodeMalformedOrUnmappable(source, ch, flush);
}
}
-
+
return CoderResult.UNDERFLOW;
}
private final CoderResult encodeTrail(CharBuffer source, char lead, boolean flush) {
/*
* ASCII doesn't support characters in the BMP, so if handleSurrogates returns null,
- * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
+ * we leave fromUChar32 alone (it should store a new codepoint) and call it unmappable.
*/
CoderResult cr = handleSurrogates(source, lead);
if (cr != null) {
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderASCII(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderASCII(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
setFillIn.add(0,0x7f);
}
* @author krajwade
*
*/
-class CharsetBOCU1 extends CharsetICU {
+class CharsetBOCU1 extends CharsetICU {
/* BOCU constants and macros */
-
+
/* initial value for "prev": middle of the ASCII range */
private static final byte BOCU1_ASCII_PREV = 0x40;
-
+
/* bounding byte values for differences */
private static final int BOCU1_MIN = 0x21;
private static final int BOCU1_MIDDLE = 0x90;
/* number of trail bytes */
private static final int BOCU1_TRAIL_COUNT =((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT);
-
+
/*
* number of positive and negative single-byte codes
* (counting 0==BOCU1_MIDDLE among the positive ones)
/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
/* private static int BOCU1_LENGTH_FROM_LEAD(int lead) {
- return ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 :
- (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 :
+ return ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 :
+ (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 :
(BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4);
}*/
private static int BOCU1_LENGTH_FROM_PACKED(int packed) {
return (((packed)&UConverterConstants.UNSIGNED_INT_MASK)<0x04000000 ? (packed)>>24 : 4);
}
-
+
/*
* Byte value map for control codes,
* from external byte values 0x00..0x20
* from trail byte values 0..19 (0..0x13) as used in the difference calculation
* to external byte values 0x00..0x20.
*/
- private static final int[]
+ private static final int[]
bocu1TrailToByte = {
/* 0 1 2 3 4 5 6 7 */
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
/* 10 11 12 13 */
0x1c, 0x1d, 0x1e, 0x1f
};
-
-
+
+
/*
* 12 commonly used C0 control codes (and space) are only used to encode
* themselves directly,
*/
private static int BOCU1_TRAIL_TO_BYTE(int trail) {
return ((trail)>=BOCU1_TRAIL_CONTROLS_COUNT ? (trail)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[trail]);
- }
-
+ }
+
/* BOCU-1 implementation functions ------------------------------------------ */
private static int BOCU1_SIMPLE_PREV(int c){
return (((c)&~0x7f)+BOCU1_ASCII_PREV);
private static int BOCU1_PREV(int c) {
return ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c));
}
-
+
protected byte[] fromUSubstitution = new byte[]{(byte)0x1A};
/* Faster versions of packDiff() for single-byte-encoded diff values. */
/** Is a diff value encodable in two bytes? */
private static boolean DIFF_IS_DOUBLE(int diff){
return (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2);
- }
-
+ }
+
public CharsetBOCU1(String icuCanonicalName, String javaCanonicalName, String[] aliases){
super(icuCanonicalName, javaCanonicalName, aliases);
- maxBytesPerChar = 4;
+ maxBytesPerChar = 4;
minBytesPerChar = 1;
maxCharsPerByte = 1;
}
-
+
class CharsetEncoderBOCU extends CharsetEncoderICU {
public CharsetEncoderBOCU(CharsetICU cs) {
super(cs,fromUSubstitution);
}
-
+
int sourceIndex, nextSourceIndex;
int prev, c , diff;
boolean checkNegative;
boolean LoopAfterTrail;
int targetCapacity;
- CoderResult cr;
-
+ CoderResult cr;
+
/* label values for supporting behavior similar to goto in C */
private static final int fastSingle=0;
private static final int getTrail=1;
private static final int regularLoop=2;
-
+
private boolean LabelLoop; //used to break the while loop
private int labelType = fastSingle; //labeType is set to fastSingle to start the code from fastSingle:
-
+
/**
* Integer division and modulo with negative numerators
* yields negative modulo results and quotients that are one more than
*/
private int NEGDIVMOD(int n, int d, int m) {
diff = n;
- (m)=(diff)%(d);
- (diff)/=(d);
- if((m)<0) {
+ (m)=(diff)%(d);
+ (diff)/=(d);
+ if((m)<0) {
--(diff);
(m)+=(d);
}
return m;
}
-
+
/**
* Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
* and return a packed integer with them.
}
return result;
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
cr = CoderResult.UNDERFLOW;
-
+
LabelLoop = true; //used to break the while loop
checkNegative = false; // its value is set to true to get out of while loop when c = -c
LoopAfterTrail = false; // its value is set to true to ignore code before getTrail:
-
+
/*set up the local pointers*/
targetCapacity = target.limit() - target.position();
c = fromUChar32;
prev = fromUnicodeStatus;
-
+
if(prev==0){
prev = BOCU1_ASCII_PREV;
}
-
+
/*sourceIndex ==-1 if the current characte began in the previous buffer*/
sourceIndex = c == 0 ? 0: -1;
nextSourceIndex = 0;
-
+
/*conversion loop*/
if(c!=0 && targetCapacity>0){
labelType = getTrail;
}
-
+
while(LabelLoop){
switch(labelType){
case fastSingle:
break;
}
}
-
+
return cr;
}
-
- private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){
-//fastSingle:
+
+ private int fastSingle(CharBuffer source, ByteBuffer target, IntBuffer offsets){
+//fastSingle:
/*fast loop for single-byte differences*/
/*use only one loop counter variable , targetCapacity, not also source*/
diff = source.limit() - source.position();
}
return regularLoop;
}
-
+
private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
if(source.hasRemaining()){
/*test the following code unit*/
/*regular loop for all classes*/
while(LoopAfterTrail || source.hasRemaining()){
if(LoopAfterTrail || targetCapacity>0){
-
+
if(!LoopAfterTrail){
c = source.get();
++nextSourceIndex;
-
+
if(c<=0x20){
/*
* ISO C0 control & space:
offsets.put(sourceIndex++);
}
--targetCapacity;
-
+
sourceIndex=nextSourceIndex;
continue;
}
-
+
if(UTF16.isLeadSurrogate((char)c)){
getTrail(source, target, offsets);
if(checkNegative){
}
}
}
-
+
if(LoopAfterTrail){
- LoopAfterTrail = false;
+ LoopAfterTrail = false;
}
-
+
/*
* all other Unicode code points c==U+0021..U+10ffff
* are encoded with the difference c-prev
int length; /*will be 2..4*/
diff = packDiff(diff);
length = BOCU1_LENGTH_FROM_PACKED(diff);
-
+
/*write the output character bytes from diff and length*/
/*from the first if in the loop we know that targetCapacity>0*/
if(length<=targetCapacity){
break;
}
errorBufferLength = length;
-
+
/* now output what fits into the regular target */
diff>>=8*length; /* length was reduced by targetCapacity */
switch(targetCapacity) {
cr = CoderResult.OVERFLOW;
break;
}
-
+
}
/*set the converter state back into UConverter*/
fromUChar32 = c<0 ? -c :0;
labelType = fastSingle;
return labelType;
}
-
+
}
-
+
static class CharsetDecoderBOCU extends CharsetDecoderICU{
public CharsetDecoderBOCU(CharsetICU cs) {
super(cs);
}
-
+
int byteIndex;
int sourceIndex, nextSourceIndex;
int prev, c , diff, count;
byte[] bytes;
CoderResult cr;
-
+
/* label values for supporting behavior similar to goto in C */
private static final int fastSingle=0;
private static final int getTrail=1;
private static final int regularLoop=2;
private static final int endLoop=3;
-
+
private boolean LabelLoop;//used to break the while loop
private boolean afterTrail; // its value is set to true to ignore code after getTrail:
private int labelType;
/* BOCU-1-from-Unicode conversion functions --------------------------------- */
-
-
+
+
/**
* Function for BOCU-1 decoder; handles multi-byte lead bytes.
*
/* return the state for decoding the trail byte(s) */
return (diffValue<<2)|countValue;
}
-
+
/**
* Function for BOCU-1 decoder; handles multi-byte trail bytes.
*
return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
}
}
-
+
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
boolean flush){
cr = CoderResult.UNDERFLOW;
-
- LabelLoop = true;
- afterTrail = false;
+
+ LabelLoop = true;
+ afterTrail = false;
labelType = fastSingle; // labelType is set to fastSingle so t
-
+
/*get the converter state*/
prev = toUnicodeStatus;
-
+
if(prev==0){
prev = BOCU1_ASCII_PREV;
}
diff = mode;
count = diff&3;
diff>>=2;
-
+
byteIndex = toULength;
bytes = toUBytesArray;
-
+
/* sourceIndex=-1 if the current character began in the previous buffer */
sourceIndex=byteIndex==0 ? 0 : -1;
nextSourceIndex=0;
-
+
/* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
if(count>0 && byteIndex>0 && target.position()<target.limit()) {
labelType = getTrail;
}
-
+
while(LabelLoop){
switch(labelType){
case fastSingle:
break;
}
}
-
+
return cr;
}
-
+
private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets){
labelType = regularLoop;
/* fast loop for single-byte differences */
target.put((char)c);
if(offsets!=null){
offsets.put(nextSourceIndex++);
- }
+ }
prev = BOCU1_SIMPLE_PREV(c);
} else {
break;
target.put((char)c);
if(offsets!=null){
offsets.put(nextSourceIndex++);
- }
+ }
} else {
break;
}
sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
return labelType;
}
-
+
private int getTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){
labelType = regularLoop;
for(;;) {
}
afterTrail = true;
return labelType;
-
+
}
-
+
private int afterGetTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets){
/* decode a sequence of single and lead bytes */
while(afterTrail || source.hasRemaining()) {
}
}
}
-
+
if(afterTrail){
afterTrail = false;
}
-
+
/* calculate the next prev and output c */
prev = BOCU1_PREV(c);
if(c<=0xffff) {
labelType = endLoop;
return labelType;
}
-
+
private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
if(cr.isMalformed()) {
/* set the converter state in UConverter to deal with the next character */
toULength=byteIndex;
LabelLoop = false;
}
-
+
}
-
-
+
+
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderBOCU(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderBOCU(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
CharsetICU.getCompleteUnicodeSet(setFillIn);
}
public CharsetCESU8(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
}
-
-
+
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
getCompleteUnicodeSet(setFillIn);
-
+
}
}
/**
* <h2> Callback API for CharsetICU API </h2>
- *
- * CharsetCallback class defines some error behaviour functions called
+ *
+ * CharsetCallback class defines some error behaviour functions called
* by CharsetDecoderICU and CharsetEncoderICU. The class also provides
* the facility by which clients can write their own callbacks.
*
* These functions, although public, should NEVER be called directly.
- * They should be used as parameters to the onUmappableCharacter() and
+ * They should be used as parameters to the onUmappableCharacter() and
* onMalformedInput() methods, to set the behaviour of a converter
* when it encounters UNMAPPED/INVALID sequences.
* Currently the only way to set callbacks is by using CodingErrorAction.
// private static final String SKIP_STOP_ON_ILLEGAL = "i";
// /*
-// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+// * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
// */
// private static final String ESCAPE_ICU = null;
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
*
* This list should be sync with the one in ucnv_err.c
- *
+ *
*/
private static boolean IS_DEFAULT_IGNORABLE_CODE_POINT(int c) {
- return ((c == 0x00AD) ||
- (c == 0x034F) ||
- (c == 0x061C) ||
- (c == 0x115F) ||
- (c == 0x1160) ||
- (0x17B4 <= c && c <= 0x17B5) ||
- (0x180B <= c && c <= 0x180E) ||
- (0x200B <= c && c <= 0x200F) ||
- (0x202A <= c && c <= 0x202E) ||
- (c == 0x2060) ||
- (0x2066 <= c && c <= 0x2069) ||
- (0x2061 <= c && c <= 0x2064) ||
- (0x206A <= c && c <= 0x206F) ||
- (c == 0x3164) ||
- (0x0FE00 <= c && c <= 0x0FE0F) ||
- (c == 0x0FEFF) ||
- (c == 0x0FFA0) ||
- (0x01BCA0 <= c && c <= 0x01BCA3) ||
- (0x01D173 <= c && c <= 0x01D17A) ||
- (c == 0x0E0001) ||
- (0x0E0020 <= c && c <= 0x0E007F) ||
- (0x0E0100 <= c && c <= 0x0E01EF) ||
- (c == 0x2065) ||
- (0x0FFF0 <= c && c <= 0x0FFF8) ||
- (c == 0x0E0000) ||
- (0x0E0002 <= c && c <= 0x0E001F) ||
- (0x0E0080 <= c && c <= 0x0E00FF) ||
+ return ((c == 0x00AD) ||
+ (c == 0x034F) ||
+ (c == 0x061C) ||
+ (c == 0x115F) ||
+ (c == 0x1160) ||
+ (0x17B4 <= c && c <= 0x17B5) ||
+ (0x180B <= c && c <= 0x180E) ||
+ (0x200B <= c && c <= 0x200F) ||
+ (0x202A <= c && c <= 0x202E) ||
+ (c == 0x2060) ||
+ (0x2066 <= c && c <= 0x2069) ||
+ (0x2061 <= c && c <= 0x2064) ||
+ (0x206A <= c && c <= 0x206F) ||
+ (c == 0x3164) ||
+ (0x0FE00 <= c && c <= 0x0FE0F) ||
+ (c == 0x0FEFF) ||
+ (c == 0x0FFA0) ||
+ (0x01BCA0 <= c && c <= 0x01BCA3) ||
+ (0x01D173 <= c && c <= 0x01D17A) ||
+ (c == 0x0E0001) ||
+ (0x0E0020 <= c && c <= 0x0E007F) ||
+ (0x0E0100 <= c && c <= 0x0E01EF) ||
+ (c == 0x2065) ||
+ (0x0FFF0 <= c && c <= 0x0FFF8) ||
+ (c == 0x0E0000) ||
+ (0x0E0002 <= c && c <= 0x0E001F) ||
+ (0x0E0080 <= c && c <= 0x0E00FF) ||
(0x0E01F0 <= c && c <= 0x0E0FFF)
);
}
/**
* This function is called when the bytes in the source cannot be handled,
* and this function is meant to handle or fix the error if possible.
- *
+ *
* @return Result of decoding action. This returned object is set to an error
* if this function could not handle the conversion.
* @stable ICU 3.6
*/
- public CoderResult call(CharsetDecoderICU decoder, Object context,
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr);
}
* if this function could not handle the conversion.
* @stable ICU 3.6
*/
- public CoderResult call(CharsetEncoderICU encoder, Object context,
- CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr);
- }
+ }
/**
* Skip callback
* @stable ICU 3.6
*/
public static final Encoder FROM_U_CALLBACK_SKIP = new Encoder() {
- public CoderResult call(CharsetEncoderICU encoder, Object context,
- CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ @Override
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
if(context==null){
return CoderResult.UNDERFLOW;
* @stable ICU 3.6
*/
public static final Decoder TO_U_CALLBACK_SKIP = new Decoder() {
- public CoderResult call(CharsetDecoderICU decoder, Object context,
+ @Override
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr){
if(context==null){
* Write substitute callback
* @stable ICU 3.6
*/
- public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
- public CoderResult call(CharsetEncoderICU encoder, Object context,
- CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ public static final Encoder FROM_U_CALLBACK_SUBSTITUTE = new Encoder(){
+ @Override
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
* @stable ICU 3.6
*/
public static final Decoder TO_U_CALLBACK_SUBSTITUTE = new Decoder() {
- public CoderResult call(CharsetDecoderICU decoder, Object context,
+ @Override
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr){
if (replacementChar.length == 1 && (replacementChar[0] == kSubstituteChar1[0] || replacementChar[0] == kSubstituteChar[0])) {
useReplacement = false;
}
-
+
/* could optimize this case, just one uchar */
if(decoder.invalidCharLength == 1 && cs.subChar1 != 0) {
return CharsetDecoderICU.toUWriteUChars(decoder, useReplacement ? replacementChar : kSubstituteChar1, 0, useReplacement ? replacementChar.length : 1, target, offsets, source.position());
* @stable ICU 3.6
*/
public static final Encoder FROM_U_CALLBACK_STOP = new Encoder() {
- public CoderResult call(CharsetEncoderICU encoder, Object context,
- CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ @Override
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
* @stable ICU 3.6
*/
public static final Decoder TO_U_CALLBACK_STOP = new Decoder() {
- public CoderResult call(CharsetDecoderICU decoder, Object context,
+ @Override
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr){
return cr;
}
- };
+ };
private static final int VALUE_STRING_LENGTH = 32;
private static final char UNICODE_PERCENT_SIGN_CODEPOINT = 0x0025;
private static final char UNICODE_U_CODEPOINT = 0x0055;
* @stable ICU 4.0
*/
public static final Encoder FROM_U_CALLBACK_ESCAPE = new Encoder() {
- public CoderResult call(CharsetEncoderICU encoder, Object context,
- CharBuffer source, ByteBuffer target, IntBuffer offsets,
+ @Override
+ public CoderResult call(CharsetEncoderICU encoder, Object context,
+ CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr){
char[] valueString = new char[VALUE_STRING_LENGTH];
int valueStringLength = 0;
int i = 0;
-
+
if (cr.isUnmappable() && IS_DEFAULT_IGNORABLE_CODE_POINT(cp)) {
return CoderResult.UNDERFLOW;
}
-
+
if (context == null || !(context instanceof String)) {
while (i < length) {
valueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
}
} else if (((String)context).equals(ESCAPE_C)) {
valueString[valueStringLength++] = UNICODE_RS_CODEPOINT; /* adding \ */
-
+
if (length == 2) {
valueString[valueStringLength++] = UNICODE_U_CODEPOINT; /* adding U */
valueStringLength = itou(valueString, valueStringLength, cp, 16, 8);
* @stable ICU 4.0
*/
public static final Decoder TO_U_CALLBACK_ESCAPE = new Decoder() {
- public CoderResult call(CharsetDecoderICU decoder, Object context,
+ @Override
+ public CoderResult call(CharsetDecoderICU decoder, Object context,
ByteBuffer source, CharBuffer target, IntBuffer offsets,
char[] buffer, int length, CoderResult cr){
char[] uniValueString = new char[VALUE_STRING_LENGTH];
int valueStringLength = 0;
int i = 0;
-
+
if (context == null || !(context instanceof String)) {
while (i < length) {
uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
}
}
}
-
+
cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
-
+
return cr;
}
- };
+ };
/***
* Java port of uprv_itou() in ICU4C used by TO_U_CALLBACK_ESCAPE and FROM_U_CALLBACK_ESCAPE.
* Fills in a char string with the radix-based representation of a number padded with zeroes
int digit;
int j;
char temp;
-
+
do {
digit = i % radix;
buffer[sourceIndex + length++] = (char)(digit <= 9 ? (0x0030+digit) : (0x0030+digit+7));
i = i/radix;
} while (i != 0 && (sourceIndex + length) < buffer.length);
-
+
while (length < minwidth) {
buffer[sourceIndex + length++] = (char)0x0030; /* zero padding */
}
buffer[(sourceIndex + length-1) -j] = buffer[sourceIndex + j];
buffer[sourceIndex + j] = temp;
}
-
+
return length;
}
private static final byte[] fromUSubstitution = new byte[] { (byte) 0x3F };
private CharsetMBCS myConverterArray[];
private byte state;
-
+
private final static byte INVALID = -2;
private final static byte DO_SEARCH = -1;
private final static byte COMPOUND_TEXT_SINGLE_0 = 0;
private final static byte COMPOUND_TEXT_SINGLE_1 = 1;
private final static byte COMPOUND_TEXT_SINGLE_2 = 2;
private final static byte COMPOUND_TEXT_SINGLE_3 = 3;
-
+
/*private final static byte COMPOUND_TEXT_DOUBLE_1 = 4;
private final static byte COMPOUND_TEXT_DOUBLE_2 = 5;
private final static byte COMPOUND_TEXT_DOUBLE_3 = 6;
private final static byte COMPOUND_TEXT_DOUBLE_5 = 8;
private final static byte COMPOUND_TEXT_DOUBLE_6 = 9;
private final static byte COMPOUND_TEXT_DOUBLE_7 = 10;
-
+
private final static byte COMPOUND_TEXT_TRIPLE_DOUBLE = 11;*/
-
+
private final static byte IBM_915 = 12;
private final static byte IBM_916 = 13;
private final static byte IBM_914 = 14;
private final static byte IBM_913 = 17;
private final static byte ISO_8859_14 = 18;
private final static byte IBM_923 = 19;
-
+
private final static byte NUM_OF_CONVERTERS = 20;
-
+
private final static byte SEARCH_LENGTH = 12;
-
+
private final static byte[][] escSeqCompoundText = {
/* Single */
{ 0x1B, 0x2D, 0x41 },
{ 0x1B, 0x2D, 0x4D },
{ 0x1B, 0x2D, 0x46 },
{ 0x1B, 0x2D, 0x47 },
-
+
/* Double */
{ 0x1B, 0x24, 0x29, 0x41 },
{ 0x1B, 0x24, 0x29, 0x42 },
{ 0x1B, 0x24, 0x29, 0x47 },
{ 0x1B, 0x24, 0x29, 0x48 },
{ 0x1B, 0x24, 0x29, 0x49 },
-
+
/* Triple/Double */
{ 0x1B, 0x25, 0x47 },
-
+
/*IBM-915*/
{ 0x1B, 0x2D, 0x4C },
/*IBM-916*/
/* IBM-923 */
{ 0x1B, 0x2D, 0x62 },
};
-
+
private final static byte ESC_START = 0x1B;
-
+
private static boolean isASCIIRange(int codepoint) {
if ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) ||
(codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) {
}
return false;
}
-
+
private static boolean isIBM915(int codepoint) {
if ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) {
return true;
}
return false;
}
-
+
private static boolean isIBM916(int codepoint) {
if ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) {
return true;
}
return false;
}
-
+
private static boolean isCompoundS3(int codepoint) {
if ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) ||
(codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) ||
}
return false;
}
-
+
private static boolean isCompoundS2(int codepoint) {
if ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) {
return true;
}
return false;
}
-
+
private static boolean isIBM914(int codepoint) {
if ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) ||
(codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) ||
}
return false;
}
-
+
private static boolean isIBM874(int codepoint) {
if ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) {
return true;
}
return false;
}
-
+
private static boolean isIBM912(int codepoint) {
return ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) ||
(codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) ||
(codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) ||
(codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD));
}
-
+
private static boolean isIBM913(int codepoint) {
if ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) ||
(codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) ||
}
return false;
}
-
+
private static boolean isCompoundS1(int codepoint) {
if ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) ||
(codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) {
}
return false;
}
-
+
private static boolean isISO8859_14(int codepoint) {
if ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) ||
(codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) ||
}
return false;
}
-
+
private static boolean isIBM923(int codepoint) {
if ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) {
return true;
}
return false;
}
-
+
private static int findNextEsc(ByteBuffer source) {
int sourceLimit = source.limit();
for (int i = (source.position() + 1); i < sourceLimit; i++) {
}
return sourceLimit;
}
-
+
private static byte getState(int codepoint) {
byte state = -1;
-
+
if (isASCIIRange(codepoint)) {
state = COMPOUND_TEXT_SINGLE_0;
} else if (isIBM912(codepoint)) {
} else if (isCompoundS1(codepoint)) {
state = COMPOUND_TEXT_SINGLE_1;
}
-
+
return state;
}
-
+
private static byte findStateFromEscSeq(ByteBuffer source, byte[] toUBytes, int toUBytesLength) {
byte state = INVALID;
int sourceIndex = source.position();
byte i, n;
int offset = toUBytesLength;
int sourceLimit = source.limit();
-
+
for (i = 0; i < escSeqCompoundText.length; i++) {
matchFound = true;
for (n = 0; n < escSeqCompoundText[i].length; n++) {
break;
}
}
-
+
if (matchFound) {
state = i;
source.position(sourceIndex + (escSeqCompoundText[i].length - offset));
}
-
+
return state;
}
-
+
public CharsetCompoundText(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
-
+
LoadConverters();
-
+
maxBytesPerChar = 6;
minBytesPerChar = 1;
maxCharsPerByte = 1;
}
-
+
private void LoadConverters() {
myConverterArray = new CharsetMBCS[NUM_OF_CONVERTERS];
-
+
myConverterArray[COMPOUND_TEXT_SINGLE_0] = null;
-
+
for (int i = 1; i < SEARCH_LENGTH; i++) {
String name = "icu-internal-compound-";
if (i <= 3) {
} else {
name = name + "t";
}
-
+
myConverterArray[i] = (CharsetMBCS)CharsetICU.forNameICU(name);
}
-
+
myConverterArray[IBM_915] = (CharsetMBCS)CharsetICU.forNameICU("ibm-915_P100-1995");
myConverterArray[IBM_916] = (CharsetMBCS)CharsetICU.forNameICU("ibm-916_P100-1995");
myConverterArray[IBM_914] = (CharsetMBCS)CharsetICU.forNameICU("ibm-914_P100-1995");
myConverterArray[ISO_8859_14] = (CharsetMBCS)CharsetICU.forNameICU("iso-8859_14-1998");
myConverterArray[IBM_923] = (CharsetMBCS)CharsetICU.forNameICU("ibm-923_P100-1998");
}
-
+
class CharsetEncoderCompoundText extends CharsetEncoderICU {
CharsetEncoderMBCS gbEncoder[];
-
+
public CharsetEncoderCompoundText(CharsetICU cs) {
super(cs, fromUSubstitution);
-
+
gbEncoder = new CharsetEncoderMBCS[NUM_OF_CONVERTERS];
-
+
for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
if (i == 0) {
gbEncoder[i] = null;
}
}
}
-
+
+ @Override
protected void implReset() {
super.implReset();
for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
}
}
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int sourceChar;
byte tmpState = 0;
int i = 0;
boolean gotoGetTrail = false;
-
+
if (!source.hasRemaining())
return CoderResult.UNDERFLOW;
else if (!target.hasRemaining())
return CoderResult.OVERFLOW;
-
+
/* check if the last codepoint of previous buffer was a lead surrogate */
if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
// goto getTrail label
- gotoGetTrail = true;
+ gotoGetTrail = true;
}
-
+
while (source.hasRemaining()) {
if (target.hasRemaining()) {
if (!gotoGetTrail) {
sourceChar = source.get();
}
-
+
targetLength = 0;
tmpTargetBuffer.position(0);
tmpTargetBuffer.limit(3);
-
+
/* check if the char is a First surrogate */
if (UTF16.isSurrogate((char)sourceChar) || gotoGetTrail) {
if (UTF16.isLeadSurrogate((char)sourceChar) || gotoGetTrail) {
// getTrail label
/* reset gotoGetTrail flag*/
gotoGetTrail = false;
-
+
/* look ahead to find the trail surrogate */
if (source.hasRemaining()) {
/* test the following code unit */
break;
}
}
-
+
tmpState = getState(sourceChar);
-
+
sourceCharArray[0] = (char)sourceChar;
-
+
if (tmpState < 0) {
/* Test all available converters */
for (i = 1; i < SEARCH_LENGTH; i++) {
if (err.isError()) {
break;
}
-
+
if (currentState != tmpState) {
currentState = tmpState;
-
+
/* Write escape sequence if necessary */
for (i = 0; i < escSeqCompoundText[currentState].length; i++) {
targetBytes[i] = escSeqCompoundText[currentState][i];
}
targetLength = i;
}
-
+
for (i = 0; i < tmpTargetBuffer.limit(); i++) {
targetBytes[i+targetLength] = tmpTargetBuffer.get(i);
}
targetLength += i;
-
+
for (i = 0; i < targetLength; i++) {
if (target.hasRemaining()) {
target.put(targetBytes[i]);
break;
}
}
-
+
if (err.isOverflow()) {
int m = 0;
for (int n = i; n < targetLength; n++) {
this.errorBufferLength = m;
}
state = currentState;
-
+
return err;
}
}
-
+
class CharsetDecoderCompoundText extends CharsetDecoderICU {
CharsetDecoderMBCS gbDecoder[];
-
+
public CharsetDecoderCompoundText(CharsetICU cs) {
super(cs);
gbDecoder = new CharsetDecoderMBCS[NUM_OF_CONVERTERS];
-
+
for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
if (i == 0) {
gbDecoder[i] = null;
}
}
}
-
+
+ @Override
protected void implReset() {
super.implReset();
for (int i = 0; i < NUM_OF_CONVERTERS; i++) {
}
}
}
-
+
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
byte[] sourceChar = { 0x00 };
byte tmpState = currentState;
CharsetDecoderMBCS decoder;
int sourceLimit = source.limit();;
-
+
if (!source.hasRemaining())
return CoderResult.UNDERFLOW;
else if (!target.hasRemaining())
return CoderResult.OVERFLOW;
-
+
while (source.hasRemaining()) {
if (target.hasRemaining()) {
if (this.toULength > 0) {
} else {
sourceChar[0] = source.get(source.position());
}
-
+
if (sourceChar[0] == ESC_START) {
tmpState = findStateFromEscSeq(source, this.toUBytesArray, this.toULength);
if (tmpState == DO_SEARCH) {
}
break;
}
-
+
this.toULength = 0;
}
-
+
if (tmpState != currentState) {
currentState = tmpState;
}
-
+
if (currentState == COMPOUND_TEXT_SINGLE_0) {
while (source.hasRemaining()) {
if (!target.hasRemaining()) {
source.limit(findNextEsc(source));
decoder = gbDecoder[currentState];
-
+
decoder.toUBytesArray = this.toUBytesArray;
decoder.toULength = this.toULength;
err = decoder.decodeLoop(source, target, offsets, true);
-
+
this.toULength = decoder.toULength;
decoder.toULength = 0;
-
+
if (err.isError()) {
if (err.isOverflow()) {
this.charErrorBufferArray = decoder.charErrorBufferArray;
this.charErrorBufferBegin = decoder.charErrorBufferBegin;
this.charErrorBufferLength = decoder.charErrorBufferLength;
-
+
decoder.charErrorBufferBegin = 0;
decoder.charErrorBufferLength = 0;
}
}
-
+
source.limit(sourceLimit);
}
-
+
if (err.isError()) {
break;
}
return err;
}
}
-
+
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderCompoundText(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderCompoundText(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
for (int i = 1; i < NUM_OF_CONVERTERS; i++) {
myConverterArray[i].MBCSGetFilteredUnicodeSetForUnicode(myConverterArray[i].sharedData, setFillIn, which, CharsetMBCS.UCNV_SET_FILTER_NONE);
*******************************************************************************
*
*******************************************************************************
-*/
+*/
package com.ibm.icu.charset;
/**
* An abstract class that provides framework methods of decoding operations for concrete
- * subclasses.
+ * subclasses.
* In the future this class will contain API that will implement converter sematics of ICU4C.
* @stable ICU 3.6
*/
-public abstract class CharsetDecoderICU extends CharsetDecoder{
+public abstract class CharsetDecoderICU extends CharsetDecoder{
int toUnicodeStatus;
byte[] toUBytesArray = new byte[128];
int charErrorBufferBegin;
char[] invalidCharBuffer = new char[128];
int invalidCharLength;
-
+
/**
* Maximum number of indexed bytes
* @internal
int preToULength; /* negative: replay */
int preToUFirstLength; /* length of first character */
int mode;
-
+
Object toUContext = null;
private CharsetCallback.Decoder onUnmappableCharacter = CharsetCallback.TO_U_CALLBACK_STOP;
private CharsetCallback.Decoder onMalformedInput = CharsetCallback.TO_U_CALLBACK_STOP;
CharsetCallback.Decoder toCharErrorBehaviour = new CharsetCallback.Decoder() {
+ @Override
public CoderResult call(CharsetDecoderICU decoder, Object context, ByteBuffer source,
CharBuffer target, IntBuffer offsets, char[] buffer, int length, CoderResult cr) {
if (cr.isUnmappable()) {
// return CharsetCallback.TO_U_CALLBACK_STOP.call(decoder, context, source, target, offsets, buffer, length, cr);
}
};
-
+
// exist to keep implOnMalformedInput and implOnUnmappableInput from being too recursive
private boolean malformedInputCalled = false;
private boolean unmappableCharacterCalled = false;
-
+
/*
* Construct a CharsetDecorderICU based on the information provided from a CharsetICU object.
- *
+ *
* @param cs The CharsetICU object containing information about how to charset to decode.
*/
CharsetDecoderICU(CharsetICU cs) {
final boolean isFallbackUsed() {
return true;
}
-
+
/**
* Fallback is currently always used by icu4j decoders.
*/
static final boolean isToUUseFallback() {
return isToUUseFallback(true);
}
-
+
/**
* Fallback is currently always used by icu4j decoders.
*/
static final boolean isToUUseFallback(boolean iUseFallback) {
return true;
}
-
+
/**
* Sets the action to be taken if an illegal sequence is encountered
- *
+ *
* @param newAction action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
+ @Override
protected final void implOnMalformedInput(CodingErrorAction newAction) {
// don't run infinitely
if (malformedInputCalled)
return;
-
+
// if we get a replace, do not let the nio replace
if (newAction == CodingErrorAction.REPLACE) {
malformedInputCalled = true;
super.onMalformedInput(CodingErrorAction.IGNORE);
malformedInputCalled = false;
}
-
+
onMalformedInput = getCallback(newAction);
}
-
+
/**
* Sets the action to be taken if an illegal sequence is encountered
- *
+ *
* @param newAction action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
+ @Override
protected final void implOnUnmappableCharacter(CodingErrorAction newAction) {
// dont run infinitely
if (unmappableCharacterCalled)
return;
-
+
// if we get a replace, do not let the nio replace
if (newAction == CodingErrorAction.REPLACE) {
unmappableCharacterCalled = true;
super.onUnmappableCharacter(CodingErrorAction.IGNORE);
unmappableCharacterCalled = false;
}
-
+
onUnmappableCharacter = getCallback(newAction);
}
-
+
/**
* Sets the callback encoder method and context to be used if an illegal sequence is encounterd.
* You would normally call this twice to set both the malform and unmappable error. In this case,
} else {
/* Error: Only malformed and unmappable are handled. */
}
-
+
if (toUContext == null || !toUContext.equals(newContext)) {
toUContext = newContext;
}
}
-
+
private static CharsetCallback.Decoder getCallback(CodingErrorAction action){
if(action==CodingErrorAction.REPLACE){
return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE;
* Flushes any characters saved in the converter's internal buffer and
* resets the converter.
* @param out action to be taken
- * @return result of flushing action and completes the decoding all input.
+ * @return result of flushing action and completes the decoding all input.
* Returns CoderResult.UNDERFLOW if the action succeeds.
* @stable ICU 3.6
*/
+ @Override
protected final CoderResult implFlush(CharBuffer out) {
return decode(EMPTY, out, null, true);
}
-
+
/**
* Resets the to Unicode mode of converter
* @stable ICU 3.6
*/
+ @Override
protected void implReset() {
toUnicodeStatus = 0 ;
toULength = 0;
charErrorBufferLength = 0;
charErrorBufferBegin = 0;
-
+
/* store previous UChars/chars to continue partial matches */
preToUBegin = 0;
preToULength = 0; /* negative: replay */
- preToUFirstLength = 0;
+ preToUFirstLength = 0;
mode = 0;
}
-
+
/**
* Decodes one or more bytes. The default behaviour of the converter
- * is stop and report if an error in input stream is encountered.
+ * is stop and report if an error in input stream is encountered.
* To set different behaviour use @see CharsetDecoder.onMalformedInput()
- * This method allows a buffer by buffer conversion of a data stream.
- * The state of the conversion is saved between calls to convert.
- * Among other things, this means multibyte input sequences can be
- * split between calls. If a call to convert results in an Error, the
- * conversion may be continued by calling convert again with suitably
- * modified parameters.All conversions should be finished with a call to
+ * This method allows a buffer by buffer conversion of a data stream.
+ * The state of the conversion is saved between calls to convert.
+ * Among other things, this means multibyte input sequences can be
+ * split between calls. If a call to convert results in an Error, the
+ * conversion may be continued by calling convert again with suitably
+ * modified parameters.All conversions should be finished with a call to
* the flush method.
* @param in buffer to decode
* @param out buffer to populate with decoded result
* action succeeds or more input is needed for completing the decoding action.
* @stable ICU 3.6
*/
+ @Override
protected CoderResult decodeLoop(ByteBuffer in,CharBuffer out){
if(in.remaining() < toUCountPending()){
return CoderResult.UNDERFLOW;
// toULength = 0;
// return CoderResult.UNDERFLOW;
// }
-
+
in.position(in.position() + toUCountPending());
-
+
/* do the conversion */
CoderResult ret = decode(in, out, null, false);
- // ok was there input held in the previous invocation of decodeLoop
+ // ok was there input held in the previous invocation of decodeLoop
// that resulted in output in this invocation?
in.position(in.position() - toUCountPending());
-
+
return ret;
}
* action succeeds or more input is needed for completing the decoding action.
*/
abstract CoderResult decodeLoop(ByteBuffer in, CharBuffer out, IntBuffer offsets, boolean flush);
-
+
/*
* Implements the ICU semantic for decode operation
* @param source The input byte buffer
* action succeeds or more input is needed for completing the decoding action.
*/
final CoderResult decode(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
-
+
/* check parameters */
if (target == null || source == null) {
throw new IllegalArgumentException();
}
-
+
/*
* Make sure that the buffer sizes do not exceed the number range for
* int32_t because some functions use the size (in units or bytes)
return;
}
*/
-
+
/* flush the target overflow buffer */
if (charErrorBufferLength > 0) {
int i = 0;
/* the overflow buffer is completely copied to the target */
charErrorBufferLength = 0;
}
-
+
if (!flush && !source.hasRemaining() && toULength == 0 && preToULength >= 0) {
/* the overflow buffer is emptied and there is no new input: we are done */
return CoderResult.UNDERFLOW;
}
-
+
/*
* Do not simply return with a buffer overflow error if
* !flush && t==targetLimit
* For example, the skip callback may be called;
* it does not output anything.
*/
-
+
return toUnicodeWithCallback(source, target, offsets, flush);
}
}
} */
final CoderResult toUnicodeWithCallback(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
-
+
int sourceIndex;
int errorInputLength;
boolean converterSawEndOfInput, calledCallback;
/* variables for m:n conversion */
ByteBuffer replayArray = ByteBuffer.allocate(EXT_MAX_BYTES);
int replayArrayIndex = 0;
-
+
ByteBuffer realSource=null;
boolean realFlush=false;
int realSourceIndex=0;
-
+
CoderResult cr = CoderResult.UNDERFLOW;
-
+
/* get the converter implementation function */
sourceIndex=0;
sourceIndex=-1;
preToULength=0;
}
-
+
/*
* loop for conversion and error handling
*
* s<sourceLimit before converterSawEndOfInput is checked
*/
converterSawEndOfInput= (cr.isUnderflow() && flush && source.remaining()==0 && toULength == 0);
-
+
/* no callback called yet for this iteration */
calledCallback=false;
-
+
/* no sourceIndex adjustment for conversion, only for callback output */
errorInputLength=0;
-
+
/*
* loop for offsets and error handling
*
int length=(target.position()-t);
if(length>0) {
updateOffsets(offsets, length, sourceIndex, errorInputLength);
-
-
+
+
/*
* if a converter handles offsets and updates the offsets
* pointer at the end, then pArgs->offset should not change
*/
//TODO: pArgs->offsets=offsets+=length;
/* }
-
+
if(sourceIndex>=0) {
sourceIndex+=(source.position()-s);
}
-
+
} */
-
+
if(preToULength<0) {
/*
* switch the source to new replay units (cannot occur while replaying)
realSource=source;
realFlush=flush;
realSourceIndex=sourceIndex;
-
+
//UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
replayArray.put(preToUArray,0, -preToULength);
// reset position
if((sourceIndex+=preToULength)<0) {
sourceIndex=-1;
}
-
+
preToULength=0;
} else {
/* see implementation note before _fromUnicodeWithCallback() */
Assert.assrt(realSource==null);
}
}
-
+
/* update pointers */
s=source.position();
//t=target.position();
-
+
if(cr.isUnderflow()) {
if(s<source.limit())
{
* the entire input stream is consumed
* and there is a partial, truncated input sequence left
*/
-
+
/* inject an error and continue with callback handling */
cr = CoderResult.malformedForLength(toULength);
calledCallback=false; /* new error condition */
if(!converterSawEndOfInput) {
break;
}
-
+
/* reset the converter without calling the callback function */
implReset();
}
-
+
/* done successfully */
return cr;
}
}
-
+
/* U_FAILURE(*err) */
{
-
+
if( calledCallback || cr.isOverflow() ||
(cr.isMalformed() && cr.isUnmappable())
) {
return cr;
}
}
-
+
/* copy toUBytes[] to invalidCharBuffer[] */
errorInputLength=invalidCharLength=toULength;
if(errorInputLength>0) {
copy(toUBytesArray, 0, invalidCharBuffer, 0, errorInputLength);
}
-
+
/* set the converter state to deal with the next character */
toULength=0;
-
+
/* call the callback function */
cr = toCharErrorBehaviour.call(this, toUContext, source, target, offsets, invalidCharBuffer, errorInputLength, cr);
/*
/*
* Returns the number of chars held in the converter's internal state
- * because more input is needed for completing the conversion. This function is
+ * because more input is needed for completing the conversion. This function is
* useful for mapping semantics of ICU's converter interface to those of iconv,
* and this information is not needed for normal conversion.
* @return The number of chars in the state. -1 if an error is encountered.
return 0;
}
}
-
+
private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
for(int i=srcOffset; i<length; i++){
* @return A CoderResult object that contains the error result when an error occurs.
*/
static final CoderResult toUWriteUChars( CharsetDecoderICU cnv,
- char[] ucharsArray, int ucharsBegin, int length,
+ char[] ucharsArray, int ucharsBegin, int length,
CharBuffer target, IntBuffer offsets, int sourceIndex) {
-
+
CoderResult cr = CoderResult.UNDERFLOW;
-
+
/* write UChars */
if(offsets==null) {
while(length>0 && target.hasRemaining()) {
}
}
/* write overflow */
- if(length>0) {
+ if(length>0) {
cnv.charErrorBufferLength= 0;
cr = CoderResult.OVERFLOW;
do {
/* Note: Currently, this method is not being used because the callback method calls toUWriteUChars with
* the substitution characters. Will leave in here for the time being. To be removed later. (4.0)
*/
- /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder,
- ByteBuffer source, CharBuffer target,
+ /*CoderResult cbToUWriteSub(CharsetDecoderICU decoder,
+ ByteBuffer source, CharBuffer target,
IntBuffer offsets){
String sub = decoder.replacement();
CharsetICU cs = (CharsetICU) decoder.charset();
} else {
return CharsetDecoderICU.toUWriteUChars(decoder, sub.toCharArray(),
0, sub.length(), target, offsets, source.position());
-
+
}
}*/
-
+
/**
* Returns the maxBytesPerChar value for the Charset that created this decoder.
* @return maxBytesPerChar
/**
* An abstract class that provides framework methods of decoding operations for concrete
- * subclasses.
+ * subclasses.
* In the future this class will contain API that will implement converter semantics of ICU4C.
* @stable ICU 3.6
*/
private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP;
CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() {
+ @Override
public CoderResult call(CharsetEncoderICU encoder, Object context,
CharBuffer source, ByteBuffer target, IntBuffer offsets,
char[] buffer, int length, int cp, CoderResult cr) {
/*
* Construcs a new encoder for the given charset
- *
+ *
* @param cs
* for which the decoder is created
* @param replacement
/**
* Sets the action to be taken if an illegal sequence is encountered
- *
+ *
* @param newAction
* action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
+ @Override
protected void implOnMalformedInput(CodingErrorAction newAction) {
onMalformedInput = getCallback(newAction);
}
-
+
/**
* Sets the action to be taken if an illegal sequence is encountered
- *
+ *
* @param newAction
* action to be taken
* @exception IllegalArgumentException
* @stable ICU 3.6
*/
+ @Override
protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
onUnmappableInput = getCallback(newAction);
}
-
+
/**
* Sets the callback encoder method and context to be used if an illegal sequence is encountered.
* You would normally call this twice to set both the malform and unmappable error. In this case,
} else {
/* Error: Only malformed and unmappable are handled. */
}
-
+
if (fromUContext == null || !fromUContext.equals(newContext)) {
setFromUContext(newContext);
}
/**
* Sets fromUContext used in callbacks.
- *
+ *
* @param newContext Object
* @exception IllegalArgumentException The object is an illegal argument for UContext.
* @stable ICU 4.0
public final void setFromUContext(Object newContext) {
fromUContext = newContext;
}
-
+
private static CharsetCallback.Encoder getCallback(CodingErrorAction action) {
if (action == CodingErrorAction.REPLACE) {
return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE;
* Flushes any characters saved in the converter's internal buffer and
* resets the converter.
* @param out action to be taken
- * @return result of flushing action and completes the decoding all input.
+ * @return result of flushing action and completes the decoding all input.
* Returns CoderResult.UNDERFLOW if the action succeeds.
* @stable ICU 3.6
*/
+ @Override
protected CoderResult implFlush(ByteBuffer out) {
return encode(EMPTY, out, null, true);
}
* Resets the from Unicode mode of converter
* @stable ICU 3.6
*/
+ @Override
protected void implReset() {
errorBufferLength = 0;
fromUnicodeStatus = 0;
* action succeeds or more input is needed for completing the decoding action.
* @stable ICU 3.6
*/
+ @Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
if (!in.hasRemaining() && this.errorBufferLength == 0) { // make sure the errorBuffer is empty
// The Java framework should have already substituted what was left.
/* callback handling */
{
int codePoint;
-
+
/* get and write the code point */
codePoint = fromUChar32;
errorInputLength = UTF16.append(invalidUCharBuffer, 0,
}*/
/**
* Overrides super class method
- * @stable ICU 3.6
+ * @stable ICU 3.6
*/
+ @Override
public boolean isLegalReplacement(byte[] repl) {
return true;
}
out.put(bytesArray[bytesBegin]);
bytesBegin++;
}
- // success
+ // success
bytesLength = 0;
} catch (BufferOverflowException ex) {
cr = CoderResult.OVERFLOW;
--obl;
}
}
- //write overflow
+ //write overflow
cnv.errorBufferLength = bytesLimit - bytesBegin;
if (cnv.errorBufferLength > 0) {
int index = 0;
/*
* Returns the number of chars held in the converter's internal state
- * because more input is needed for completing the conversion. This function is
+ * because more input is needed for completing the conversion. This function is
* useful for mapping semantics of ICU's converter interface to those of iconv,
* and this information is not needed for normal conversion.
* @return The number of chars in the state. -1 if an error is encountered.
}
/**
- *
+ *
* @param source
*/
private final void setSourcePosition(CharBuffer source) {
- // ok was there input held in the previous invocation of encodeLoop
+ // ok was there input held in the previous invocation of encodeLoop
// that resulted in output in this invocation?
source.position(source.position() - fromUCountPending());
}
*/
if (cr.isOverflow()) {
/* Overflowed target. Now, we'll write into the charErrorBuffer.
- * It's a fixed size. If we overflow it...Hm
+ * It's a fixed size. If we overflow it...Hm
*/
/* start the new target at the first free slot in the error buffer */
* a lead surrogate followed by a trail surrogate. This method can change
* the source position and will modify fromUChar32.
* </p>
- *
+ *
* <p>
* If <code>null</code> is returned, then there was success in reading a
* surrogate pair, the codepoint is stored in <code>fromUChar32</code> and
* <code>fromUChar32</code> should be reset (to 0) after being read.
* </p>
- *
+ *
* @param source
* The encoding source.
* @param lead
* requirement, the calling method must also increment the index if this method returns
* <code>null</code>.
* </p>
- *
- *
+ *
+ *
* @param source
* The encoding source.
* @param lead
fromUChar32 = UCharacter.getCodePoint(lead, trail);
return null;
}
-
+
/**
* Returns the maxCharsPerByte value for the Charset that created this encoder.
* @return maxCharsPerByte
public final float maxCharsPerByte() {
return ((CharsetICU)(this.charset())).maxCharsPerByte;
}
-
+
/**
* Calculates the size of a buffer for conversion from Unicode to a charset.
* The calculated size is guaranteed to be sufficient for this conversion.
maxBytesPerChar = 4;
minBytesPerChar = 1;
maxCharsPerByte = 1;
-
+
isEmptySegment = false;
}
gbDecoder = (CharsetMBCS.CharsetDecoderMBCS) gbCharset.newDecoder();
}
+ @Override
protected void implReset() {
super.implReset();
gbDecoder.implReset();
isEmptySegment = false;
}
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
byte[] tempBuf = new byte[2];
* add another bit to distinguish a 0 byte from not having seen a lead byte
*/
toUnicodeStatus = mySourceChar | 0x100;
- isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */
+ isEmptySegment = false; /* the segment has something, either valid or will produce a different error, so reset this */
}
continue;
} else {
* - We include at least the first byte in the illegal sequence.
* - If any of the non-initial bytes could be the start of a character,
* we stop the illegal sequence before the first one of those
- *
+ *
* In HZ DBCS, if the second byte is in the 21..7e range,
* we report ony the first byte as the illegal sequence.
* Otherwise we convert of report the pair of bytes.
gbEncoder = (CharsetMBCS.CharsetEncoderMBCS) gbCharset.newEncoder();
}
+ @Override
protected void implReset() {
super.implReset();
gbEncoder.implReset();
isTargetUCharDBCS = false;
}
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
int length = 0;
int[] targetUniChar = new int[] { 0 };
}
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderHZ(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderHZ(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
setFillIn.add(0,0x7f);
// CharsetMBCS mbcshz = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
* Copyright (C) 2006-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
-*/
+*/
package com.ibm.icu.charset;
int options;
float maxCharsPerByte;
-
+
String name; /* +4: 60 internal name of the converter- invariant chars */
int codepage; /* +64: 4 codepage # (now IBM-$codepage) */
byte subChar[/*UCNV_MAX_SUBCHAR_LEN*/]; /* +72: 4 [note: 4 and 8 byte boundary] */
byte subCharLen; /* +76: 1 */
-
+
byte hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */
byte hasFromUnicodeFallback; /* +78: 1 */
short unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */
byte subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */
//byte reserved[/*19*/]; /* +81: 19 to round out the structure */
-
-
+
+
// typedef enum UConverterUnicodeSet {
- /**
- * Parameter that select the set of roundtrippable Unicode code points.
+ /**
+ * Parameter that select the set of roundtrippable Unicode code points.
* @stable ICU 4.0
*/
- public static final int ROUNDTRIP_SET=0;
+ public static final int ROUNDTRIP_SET=0;
/**
* Select the set of Unicode code points with roundtrip or fallback mappings.
* Not supported at this point.
public static final int ROUNDTRIP_AND_FALLBACK_SET =1;
//} UConverterUnicodeSet;
-
+
/**
- *
+ *
* @param icuCanonicalName
* @param canonicalName
* @param aliases
}
this.icuCanonicalName = icuCanonicalName;
}
-
+
/**
* Ascertains if a charset is a sub set of this charset
* Implements the abstract method of super class.
* @return true if the given charset is a subset of this charset
* @stable ICU 3.6
*/
+ @Override
public boolean contains(Charset cs){
if (null == cs) {
return false;
algorithmicCharsets.put("LMBCS-18", "com.ibm.icu.charset.CharsetLMBCS");
algorithmicCharsets.put("LMBCS-19", "com.ibm.icu.charset.CharsetLMBCS");
algorithmicCharsets.put("BOCU-1", "com.ibm.icu.charset.CharsetBOCU1" );
- algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" );
+ algorithmicCharsets.put("SCSU", "com.ibm.icu.charset.CharsetSCSU" );
algorithmicCharsets.put("US-ASCII", "com.ibm.icu.charset.CharsetASCII" );
algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
Class<?>[] paramTypes = new Class<?>[]{ String.class, String.class, String[].class};
final Constructor<? extends CharsetICU> c = cs.getConstructor(paramTypes);
Object[] params = new Object[]{ icuCanonicalName, javaCanonicalName, aliases};
-
+
// Run constructor
try {
conv = c.newInstance(params);
}
}catch(ClassNotFoundException ex){
}catch(NoSuchMethodException ex){
- }catch (IllegalAccessException ex){
- }catch (InstantiationException ex){
+ }catch (IllegalAccessException ex){
+ }catch (InstantiationException ex){
}
- throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
+ throw new UnsupportedCharsetException( icuCanonicalName+": "+"Could not load " + className);
}
-
+
static final boolean isSurrogate(int c){
return (((c)&0xfffff800)==0xd800);
}
-
+
/*
- * Returns the default charset name
+ * Returns the default charset name
*/
// static final String getDefaultCharsetName(){
// String defaultEncoding = new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding();
* available. If the ICU charset provider does not support
* the specified charset, then try other charset providers
* including the standard Java charset provider.
- *
+ *
* @param charsetName The name of the requested charset,
* may be either a canonical name or an alias
* @return A charset object for the named charset
* This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
* start of the stream for example U+FEFF (the Unicode BOM/signature
* character) that can be ignored.
- *
+ *
* Detects Unicode signature byte sequences at the start of the byte stream
* and returns number of bytes of the BOM of the indicated Unicode charset.
* 0 is returned when no Unicode signature is recognized.
- *
+ *
*/
// TODO This should be proposed as CharsetDecoderICU API.
// static String detectUnicodeSignature(ByteBuffer source) {
// /* no known Unicode signature byte sequence recognized */
// return null;
// }
-
-
+
+
abstract void getUnicodeSetImpl(UnicodeSet setFillIn, int which);
-
+
/**
* Returns the set of Unicode code points that can be converted by an ICU Converter.
*
* <p>The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): The set of all Unicode code points that can be
- * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback
+ * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback
* mappings or are only the result of reverse fallback mappings. See UTR #22 "Character Mapping Markup Language" at <a href="http://www.unicode.org/reports/tr22/">http://www.unicode.org/reports/tr22/</a>
- *
+ *
* <p>In the future, there may be more UConverterUnicodeSet choices to select sets with different properties.
*
* <p>This is useful for example for
* by comparing its roundtrip set with the set of ExemplarCharacters from
* ICU's locale data or other sources</li></ul>
*
- * @param setFillIn A valid UnicodeSet. It will be cleared by this function before
+ * @param setFillIn A valid UnicodeSet. It will be cleared by this function before
* the converter's specific set is filled in.
* @param which A selector; currently ROUNDTRIP_SET is the only supported value.
- * @throws IllegalArgumentException if the parameters does not match.
+ * @throws IllegalArgumentException if the parameters does not match.
* @stable ICU 4.0
*/
public void getUnicodeSet(UnicodeSet setFillIn, int which){
setFillIn.clear();
getUnicodeSetImpl(setFillIn, which);
}
-
+
/**
* Returns whether or not the charset of the converter has a fixed number of bytes
* per charset character.
if (this instanceof CharsetASCII || this instanceof CharsetUTF32) {
return true;
}
-
+
if (this instanceof CharsetMBCS) {
if (((CharsetMBCS)this).sharedData.staticData.maxBytesPerChar == ((CharsetMBCS)this).sharedData.staticData.minBytesPerChar) {
return true;
}
}
-
+
return false;
}
-
+
static void getNonSurrogateUnicodeSet(UnicodeSet setFillIn){
setFillIn.add(0, 0xd7ff);
setFillIn.add(0xe000, 0x10ffff);
}
-
+
static void getCompleteUnicodeSet(UnicodeSet setFillIn){
setFillIn.add(0, 0x10ffff);
}
class CharsetISO2022 extends CharsetICU {
private UConverterDataISO2022 myConverterData;
private int variant; // one of enum {ISO_2022_JP, ISO_2022_KR, or ISO_2022_CN}
-
+
private static final byte[] SHIFT_IN_STR = { 0x0f };
// private static final byte[] SHIFT_OUT_STR = { 0x0e };
*/
private static final char HWKANA_START = 0xff61;
private static final char HWKANA_END = 0xff9f;
-
+
/*
* 94-character sets with native byte values A1..FE are encoded in ISO 2022
* as bytes 21..7E. (Subtract 0x80.)
*/
private static final char GR96_START = 0xa0;
private static final char GR96_END = 0xff;
-
+
/* for ISO-2022-JP and -CN implementations */
// typedef enum {
/* shared values */
private static final byte INVALID_STATE = -1;
private static final byte ASCII = 0;
-
+
private static final byte SS2_STATE = 0x10;
private static final byte SS3_STATE = 0x11;
-
+
/* JP */
private static final byte ISO8859_1 = 1;
private static final byte ISO8859_7 = 2;
private static final byte GB2312 = 6;
private static final byte KSC5601 = 7;
private static final byte HWKANA_7BIT = 8; /* Halfwidth Katakana 7 bit */
-
+
/* CN */
/* the first few enum constants must keep their values because they corresponds to myConverterArray[] */
private static final byte GB2312_1 = 1;
private static final byte ISO_IR_165= 2;
private static final byte CNS_11643 = 3;
-
+
/*
* these are used in StateEnum and ISO2022State variables,
* but CNS_11643 must be used to index into myConverterArray[]
private static final byte CNS_11643_6 = 0x26;
private static final byte CNS_11643_7 = 0x27;
// } StateEnum;
-
+
public CharsetISO2022(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
-
+
myConverterData = new UConverterDataISO2022();
-
+
int versionIndex = icuCanonicalName.indexOf("version=");
int version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
-
+
myConverterData.version = version;
-
+
if (icuCanonicalName.indexOf("locale=ja") > 0) {
ISO2022InitJP(version);
} else if (icuCanonicalName.indexOf("locale=zh") > 0) {
} else /* if (icuCanonicalName.indexOf("locale=ko") > 0) */ {
ISO2022InitKR(version);
}
-
+
myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
}
-
+
private void ISO2022InitJP(int version) {
variant = ISO_2022_JP;
-
+
maxBytesPerChar = 6;
minBytesPerChar = 1;
maxCharsPerByte = 1;
- // open the required converters and cache them
+ // open the required converters and cache them
if((jpCharsetMasks[version]&CSM(ISO8859_7)) != 0) {
myConverterData.myConverterArray[ISO8859_7] = ((CharsetMBCS)CharsetICU.forNameICU("ISO8859_7")).sharedData;
}
if ((jpCharsetMasks[version]&CSM(KSC5601)) != 0) {
myConverterData.myConverterArray[KSC5601] = ((CharsetMBCS)CharsetICU.forNameICU("ksc_5601")).sharedData;
}
-
+
// create a generic CharsetMBCS object
myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
}
-
+
private void ISO2022InitCN(int version) {
variant = ISO_2022_CN;
-
+
maxBytesPerChar = 8;
minBytesPerChar = 1;
maxCharsPerByte = 1;
myConverterData.myConverterArray[GB2312_1] = ((CharsetMBCS)CharsetICU.forNameICU("ibm-5478")).sharedData;
if (version == 1) {
myConverterData.myConverterArray[ISO_IR_165] = ((CharsetMBCS)CharsetICU.forNameICU("iso-ir-165")).sharedData;
- }
+ }
myConverterData.myConverterArray[CNS_11643] = ((CharsetMBCS)CharsetICU.forNameICU("cns-11643-1992")).sharedData;
-
+
// create a generic CharsetMBCS object
myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
}
-
+
private void ISO2022InitKR(int version) {
variant = ISO_2022_KR;
-
+
maxBytesPerChar = 8;
minBytesPerChar = 1;
maxCharsPerByte = 1;
-
+
if (version == 1) {
myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("icu-internal-25546");
myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
} else {
myConverterData.currentConverter = (CharsetMBCS)CharsetICU.forNameICU("ibm-949");
}
-
+
myConverterData.currentEncoder = (CharsetEncoderMBCS)myConverterData.currentConverter.newEncoder();
myConverterData.currentDecoder = (CharsetDecoderMBCS)myConverterData.currentConverter.newDecoder();
}
-
+
/*
* ISO 2022 control codes must not be converted from Unicode
* because they would mess up the byte stream.
* The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b
* corresponding to SO, SI, and ESC.
*/
- private static boolean IS_2022_CONTROL(int c) {
+ private static boolean IS_2022_CONTROL(int c) {
return (c<0x20) && (((1<<c) & 0x0800c000) != 0);
}
-
+
/*
* Check that the result is a 2-byte value with each byte in the range A1..FE
* (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
* return 0 if out of range.
*/
private static int _2022FromGR94DBCS(int value) {
- if ((value <= 0xfefe && value >= 0xa1a1) &&
+ if ((value <= 0xfefe && value >= 0xa1a1) &&
((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(value&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
return (value - 0x8080); /* shift down to 21..7e byte range */
} else {
return 0; /* not valid for ISO 2022 */
}
}
-
+
/*
- * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
- *
+ * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that.
+ *
* This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
* 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
- * unchanged.
- *
+ * unchanged.
+ *
private static int _2022ToGR94DBCS(int value) {
int returnValue = value + 0x8080;
-
- if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) &&
+
+ if ((returnValue <= 0xfefe && returnValue >= 0xa1a1) &&
((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) <= 0xfe && ((short)(returnValue&UConverterConstants.UNSIGNED_BYTE_MASK) >= 0xa1))) {
return returnValue;
} else {
return value;
}
}*/
-
+
/* is the StateEnum charset value for a DBCS charset? */
private static boolean IS_JP_DBCS(byte cs) {
return ((JISX208 <= cs) && (cs <= KSC5601));
}
-
+
private static short CSM(short cs) {
return (short)(1<<cs);
}
-
+
/* This gets the valid index of the end of buffer when decoding. */
private static int getEndOfBuffer_2022(ByteBuffer source) {
int sourceIndex = source.position();
byte mySource = 0;
mySource = source.get(sourceIndex);
-
+
while (source.hasRemaining() && mySource != ESC_2022) {
mySource = source.get();
if (mySource == ESC_2022) {
}
return sourceIndex;
}
-
+
/*
* This is a simple version of _MBCSGetNextUChar() calls the method in CharsetDecoderMBCS and returns
* the value given.
* otherwise the Unicode code point
*/
private int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
- ByteBuffer source,
+ ByteBuffer source,
boolean useFallback) {
int returnValue;
UConverterSharedData tempSharedData = myConverterData.currentConverter.sharedData;
myConverterData.currentConverter.sharedData = sharedData;
returnValue = myConverterData.currentDecoder.simpleGetNextUChar(source, useFallback);
myConverterData.currentConverter.sharedData = tempSharedData;
-
+
return returnValue;
}
return 0; /* no mapping */
}
}
-
+
/*
* Each of these charset masks (with index x) contains a bit for a charset in exact correspondence
* to whether that charset is used in the corresponding version x of ISO_2022, locale=ja,version=x
- *
+ *
* Note: The converter uses some leniency:
* - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in
* all versions, not just JIS7 and JIS8.
private byte []cs; /* Charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */
private byte g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */
private byte prevG; /* g before single shift (SS2 or SS3) */
-
+
ISO2022State() {
cs = new byte[4];
}
-
+
void reset() {
Arrays.fill(cs, (byte)0);
g = 0;
prevG = 0;
}
}
-
+
// private static final byte UCNV_OPTIONS_VERSION_MASK = 0xf;
private static final byte UCNV_2022_MAX_CONVERTERS = 10;
-
+
private static class UConverterDataISO2022 {
UConverterSharedData []myConverterArray;
CharsetEncoderMBCS currentEncoder;
int key;
int version;
boolean isEmptySegment;
-
+
UConverterDataISO2022() {
myConverterArray = new UConverterSharedData[UCNV_2022_MAX_CONVERTERS];
toU2022State = new ISO2022State();
version = 0;
isEmptySegment = false;
}
-
+
void reset() {
toU2022State.reset();
fromU2022State.reset();
isEmptySegment = false;
}
}
-
+
private static final byte ESC_2022 = 0x1B; /* ESC */
-
+
// typedef enum {
private static final byte INVALID_2022 = -1; /* Doesn't correspond to a valid iso 2022 escape sequence */
private static final byte VALID_NON_TERMINAL_2022 = 0; /* so far corresponds to a valid iso 2022 escape sequence */
private static final byte VALID_MAYBE_TERMINAL_2022 = 2; /* so far matches one iso 2022 escape sequence, but by adding
more characters might match another escape sequence */
// } UCNV_TableStates_2022;
-
+
/*
* The way these state transition arrays work is:
* ex : ESC$B is the sequence for JISX208
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0
};
-
+
private static final short MAX_STATES_2022 = 74;
private static final int escSeqStateTable_Key_2022[/* MAX_STATES_2022 */] = {
/* 0 1 2 3 4 5 6 7 8 9 */
40139, 40140, 40141, 1123363, 35947624, 35947625, 35947626, 35947627, 35947629, 35947630,
35947631, 35947635, 35947636, 35947638
};
-
+
private static final byte escSeqStateTable_Value_2022[/* MAX_STATES_2022 */] = {
/* 0 1 2 3 4 */
- VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022,
+ VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_NON_TERMINAL_2022,
VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_NON_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
VALID_MAYBE_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022,
VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022, VALID_TERMINAL_2022
};
-
+
/* Type def for refactoring changeState_2022 code */
// typedef enum {
private static final byte ISO_2022_JP = 1;
private static final byte ISO_2022_KR = 2;
private static final byte ISO_2022_CN = 3;
// } Variant2022;
-
+
/* const UConverterSharedData _ISO2022Data; */
//private UConverterSharedData _ISO2022JPData;
//private UConverterSharedData _ISO2022KRData;
//private UConverterSharedData _ISO2022CNData;
-
+
/******************** to unicode ********************/
/****************************************************
* Recognized escape sequenes are
INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
};
-
+
private final static byte nextStateToUnicodeCN[/* MAX_STATES_2022 */] = {
/* 0 1 2 3 4 5 6 7 8 9 */
INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, SS2_STATE, SS3_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE,
INVALID_STATE, INVALID_STATE, INVALID_STATE, INVALID_STATE
};
-
+
/* runs through a state machine to determine the escape sequence - codepage correspondence */
@SuppressWarnings("fallthrough")
private CoderResult changeState_2022(CharsetDecoderICU decoder, ByteBuffer source, int var) {
int initialToULength = decoder.toULength;
byte c;
int malformLength = 0;
-
+
value = VALID_NON_TERMINAL_2022;
while (source.hasRemaining()) {
c = source.get();
malformLength++;
decoder.toUBytesArray[decoder.toULength++] = c;
value = getKey_2022(c, key, offset);
-
+
switch(value) {
-
+
case VALID_NON_TERMINAL_2022:
/* continue with the loop */
break;
-
+
case VALID_TERMINAL_2022:
key[0] = 0;
DONE = true;
break;
-
+
case INVALID_2022:
DONE = true;
break;
-
+
case VALID_MAYBE_TERMINAL_2022:
/* not ISO_2022 itself, finish here */
value = VALID_TERMINAL_2022;
}
// DONE:
myConverterData.key = key[0];
-
+
if (value == VALID_NON_TERMINAL_2022) {
/* indicate that the escape sequence is incomplete: key !=0 */
return err;
myConverterData.toU2022State.prevG = myConverterData.toU2022State.g;
}
myConverterData.toU2022State.g = 2;
- } else {
+ } else {
/* illegal to have SS2 before a matching designator */
err = CoderResult.malformedForLength(malformLength);
}
decoder.toULength = 1;
}
}
-
+
return err;
}
-
+
private static byte getKey_2022(byte c, int[]key, int[]offset) {
int togo;
int low = 0;
int hi = MAX_STATES_2022;
int oldmid = 0;
-
- togo = normalize_esq_chars_2022[(short)c&UConverterConstants.UNSIGNED_BYTE_MASK];
-
+
+ togo = normalize_esq_chars_2022[c&UConverterConstants.UNSIGNED_BYTE_MASK];
+
if (togo == 0) {
/* not a valid character anywhere in an escape sequence */
key[0] = 0;
return INVALID_2022;
}
togo = (key[0] << 5) + togo;
-
+
while (hi != low) { /* binary search */
int mid = (hi+low) >> 1; /* Finds median */
-
+
if (mid == oldmid) {
break;
}
-
+
if (escSeqStateTable_Key_2022[mid] > togo) {
hi = mid;
} else if (escSeqStateTable_Key_2022[mid] < togo) {
}
return INVALID_2022;
}
-
+
/*
* To Unicode Callback helper function
*/
cnv.toUBytesArray[0] = (byte)sourceChar;
cnv.toULength = 1;
}
-
+
if (targetUniChar == (UConverterConstants.missingCharMarker-1/* 0xfffe */)) {
err = CoderResult.unmappableForLength(1);
} else {
err = CoderResult.malformedForLength(1);
}
-
+
return err;
}
-
+
/****************************ISO-2022-JP************************************/
private class CharsetDecoderISO2022JP extends CharsetDecoderICU {
public CharsetDecoderISO2022JP(CharsetICU cs) {
super(cs);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
myConverterData.reset();
}
- /*
- * Map 00..7F to Unicode according to JIS X 0201.
+ /*
+ * Map 00..7F to Unicode according to JIS X 0201.
* */
private int jisx201ToU(int value) {
if (value < 0x5c) {
c2 = 0; /* invalid */
}
}
-
+
c1 >>=1;
if (c1 <= 0x2f) {
c1 += 0x70;
bytes[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & c2);
}
+ @Override
@SuppressWarnings("fallthrough")
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
boolean gotoGetTrail = false;
int mySourceCharTemp = 0x0000; // use for getTrail label call.
byte cs; /* StateEnum */
byte csTemp= 0; // use for getTrail label call.
-
+
if (myConverterData.key != 0) {
/* continue with a partial escape sequence */
// goto escape;
mySourceCharTemp = 0x99;
gotoGetTrail = true;
}
-
+
while (source.hasRemaining() || gotoEscape || gotoGetTrail) {
// This code is here for the goto escape label call above.
if (gotoEscape) {
mySourceCharTemp = ESC_2022;
}
-
+
targetUniChar = UConverterConstants.missingCharMarker;
-
+
if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
if (!gotoEscape && !gotoGetTrail) {
mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
mySourceCharTemp = mySourceChar;
}
-
+
switch (mySourceCharTemp) {
case UConverterConstants.SI:
if (myConverterData.version == 3) {
myConverterData.isEmptySegment = false;
break;
}
-
+
case UConverterConstants.SO:
if (myConverterData.version == 3) {
/* JIS7: switch to G1 half-width Katakana */
myConverterData.toU2022State.cs[1] = HWKANA_7BIT;
myConverterData.toU2022State.g = 1;
- continue;
+ continue;
} else {
/* only JIS7 uses SI/SO, not ISO-2022-JP-x */
myConverterData.isEmptySegment = false; /* reset this, we have a different error */
break;
}
-
+
case ESC_2022:
if (!gotoEscape) {
source.position(source.position() - 1);
{
int mySourceBefore = source.position();
int toULengthBefore = this.toULength;
-
+
err = changeState_2022(this, source, variant);
/* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
if (!gotoGetTrail && ((mySourceChar >= 0xa1) && (mySourceChar <= 0xdf) && myConverterData.version == 4 && !IS_JP_DBCS(cs))) {
/* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
-
+
/* return from a single-shift state to the previous one */
if (myConverterData.toU2022State.g >= 2) {
myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
gotoGetTrail = false;
short trailByte;
boolean leadIsOk, trailIsOk;
-
+
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
/*
* Ticket 5691: consistent illegal sequences:
* - We include at least the first byte in the illegal sequence.
* - If any of the non-initial bytes could be the start of a character,
* we stop the illegal sequence before the first one of those.
- *
+ *
* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
* Otherwise we convert or report the pair of bytes.
}
break;
} /* end of outer switch */
-
+
if (targetUniChar < (UConverterConstants.missingCharMarker-1/*0xfffe*/)) {
if (offsets != null) {
offsets.put(target.remaining(), source.remaining() - (mySourceChar <= 0xff ? 1 : 2));
}
target.get();
} else {
- charErrorBufferArray[charErrorBufferLength++] =
+ charErrorBufferArray[charErrorBufferLength++] =
(char)(0xdc00+(char)(targetUniChar&0x3ff));
}
} else {
return err;
}
} // end of class CharsetDecoderISO2022JP
-
+
/****************************ISO-2022-CN************************************/
private class CharsetDecoderISO2022CN extends CharsetDecoderICU {
public CharsetDecoderISO2022CN(CharsetICU cs) {
super(cs);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
myConverterData.reset();
}
+ @Override
@SuppressWarnings("fallthrough")
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int mySourceCharTemp = 0x0000;
boolean gotoEscape = false;
boolean gotoGetTrailByte = false;
-
+
if (myConverterData.key != 0) {
/* continue with a partial escape sequence */
// goto escape;
// goto getTrailByte
gotoGetTrailByte = true;
}
-
+
while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
targetUniChar = UConverterConstants.missingCharMarker;
-
+
if (target.hasRemaining() || gotoEscape) {
if (gotoEscape) {
mySourceChar = ESC_2022; // goto escape label
mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
mySourceCharTemp = mySourceChar;
}
-
+
switch (mySourceCharTemp) {
case UConverterConstants.SI:
myConverterData.toU2022State.g = 0;
return err;
}
continue;
-
+
case UConverterConstants.SO:
if (myConverterData.toU2022State.cs[1] != 0) {
myConverterData.toU2022State.g = 1;
myConverterData.isEmptySegment = false; /* Handling a different error, reset this to avoid future spurious errs */
break;
}
-
+
case ESC_2022:
if (!gotoEscape) {
source.position(source.position()-1);
return err;
}
continue;
-
+
/*ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
case CR:
/* falls through */
short trailByte;
// getTrailByte: label
gotoGetTrailByte = false; // reset gotoGetTrailByte
-
+
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
/*
* Ticket 5691: consistent illegal sequences:
* - We include at least the first byte in the illegal sequence.
* - If any of the non-initial bytes could be the start of a character,
* we stop the illegal sequence before the first one of those.
- *
+ *
* In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
* Otherwise we convert or report the pair of bytes.
tempBuffer.limit(tempBufLen);
targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
mySourceChar = (mySourceChar << 8) | trailByte;
-
+
} else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
/* report a pair of illegal bytes if the second byte is not a DBCS starter */
source.get();
charErrorBufferArray[charErrorBufferLength++] = (char)(0xdc00+(char)(targetUniChar&0x3ff));
}
} else {
- /* Call the callback function */
+ /* Call the callback function */
err = toUnicodeCallback(this, mySourceChar, targetUniChar);
break;
}
-
+
} else {
err = CoderResult.OVERFLOW;
break;
}
}
-
+
return err;
}
-
+
}
/************************ ISO-2022-KR ********************/
private class CharsetDecoderISO2022KR extends CharsetDecoderICU {
public CharsetDecoderISO2022KR(CharsetICU cs) {
super(cs);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
setInitialStateToUnicodeKR();
myConverterData.reset();
}
-
+
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int mySourceChar = 0x0000;
boolean usingFallback;
boolean gotoGetTrailByte = false;
boolean gotoEscape = false;
-
+
if (myConverterData.version == 1) {
return decodeLoopIBM(myConverterData.currentDecoder, source, target, offsets, flush);
}
-
+
/* initialize state */
usingFallback = isFallbackUsed();
-
+
if (myConverterData.key != 0) {
/* continue with a partial escape sequence */
gotoEscape = true;
toULength = 0;
gotoGetTrailByte = true;
}
-
+
while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
if (!gotoGetTrailByte && !gotoEscape) {
mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
}
-
+
if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
myConverterData.toU2022State.g = 0;
if (myConverterData.isEmptySegment) {
}
// escape label
gotoEscape = false; // reset gotoEscape flag
- myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
+ myConverterData.isEmptySegment = false; /* Any invalid ESC sequences will be detected separately, so just reset this */
err = changeState_2022(this, source, ISO_2022_KR);
if (err.isError()) {
return err;
short trailByte;
// getTrailByte label
gotoGetTrailByte = false; // reset gotoGetTrailByte flag
-
+
trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
targetUniChar = UConverterConstants.missingCharMarker;
/*
* - We include at least the first byte in the illegal sequence.
* - If any of the non-initial bytes could be the start of a character,
* we stop the illegal sequence before the first one of those.
- *
+ *
* In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
* an ESC/SO/SI, we report only the first byte as the illegal sequence.
* Otherwise we convert or report the pair of bytes.
int savedSourceLimit = source.limit();
int savedSourcePosition = source.position();
source.limit(source.position());
- source.position(source.position()-1);
+ source.position(source.position()-1);
targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
source.limit(savedSourceLimit);
source.position(savedSourcePosition);
break;
}
}
-
+
return err;
}
-
+
protected CoderResult decodeLoopIBM(CharsetDecoderMBCS cnv, ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int sourceStart;
int argTarget;
boolean gotoEscape = false;
int oldSourceLimit;
-
+
/* remember the original start of the input for offsets */
sourceStart = argSource = source.position();
-
+
if (myConverterData.key != 0) {
/* continue with a partial escape sequence */
gotoEscape = true;
}
-
+
while (gotoEscape || (!err.isError() && source.hasRemaining())) {
if (!gotoEscape) {
/* Find the end of the buffer e.g : Next Escape Seq | end of Buffer */
if (source.position() != sourceLimit) {
/*
* get the current partial byte sequence
- *
+ *
* it needs to be moved between the public and the subconverter
* so that the conversion frameword, which only sees the public
* converter, can handle truncated and illegal input etc.
cnv.toUBytesArray = toUBytesArray.clone();
}
cnv.toULength = toULength;
-
+
/*
* Convert up to the end of the input, or to before the next escape character.
* Does not handle conversion extensions because the preToU[] state etc.
}
}
argSource = source.position();
-
+
/* copy input/error/overflow buffers */
if (cnv.toULength > 0) {
toUBytesArray = cnv.toUBytesArray.clone();
}
toULength = cnv.toULength;
-
+
if (err.isOverflow()) {
if (cnv.charErrorBufferLength > 0) {
charErrorBufferArray = cnv.charErrorBufferArray.clone();
cnv.charErrorBufferLength = 0;
}
}
-
+
if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
return err;
}
return err;
}
}
-
+
/******************** from unicode **********************/
/* preference order of JP charsets */
private final static byte []jpCharsetPref = {
0x212B,
0x212C /* U+FF9F */
};
-
+
protected byte [][]fromUSubstitutionChar = new byte[][]{ { (byte)0x1A }, { (byte)0x2F, (byte)0x7E} };
/****************************ISO-2022-JP************************************/
private class CharsetEncoderISO2022JP extends CharsetEncoderICU {
public CharsetEncoderISO2022JP(CharsetICU cs) {
super(cs, fromUSubstitutionChar[0]);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
myConverterData.reset();
}
return (int)(UConverterConstants.UNSIGNED_INT_MASK & 0xfffe);
}
-
+
/*
* Take a valid Shift-JIS byte pair, check that it is in the range corresponding
* to JIS X 0208, and convert it to a pair of 21..7E bytes.
*/
private int _2022FromSJIS(int value) {
short trail;
-
+
if (value > 0xEFFC) {
return 0; /* beyond JIS X 0208 */
}
-
+
trail = (short)(value & UConverterConstants.UNSIGNED_BYTE_MASK);
-
+
value &= 0xff00; /* lead byte */
if (value <= 0x9f00) {
value -= 0x7000;
} else { /* 0xe000 <= value <= 0xef00 */
value -= 0xb000;
}
-
+
value <<= 1;
-
+
if (trail <= 0x9e) {
value -= 0x100;
if (trail <= 0x7e) {
} else { /* trail <= 0xfc */
value |= ((trail - 0x7e) & UConverterConstants.UNSIGNED_BYTE_MASK);
}
-
+
return value;
}
/* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
- CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ @Override
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
CharBuffer source, ByteBuffer target, IntBuffer offsets){
CoderResult err = CoderResult.UNDERFLOW;
byte[] buffer = new byte[8];
int i = 0;
byte[] subchar;
subchar = encoder.replacement();
-
+
byte cs;
if (myConverterData.fromU2022State.g == 1) {
/* JIS7: switch from G1 to G0 */
buffer[i++] = UConverterConstants.SI;
}
cs = myConverterData.fromU2022State.cs[0];
-
+
if (cs != ASCII && cs != JISX201) {
/* not in ASCII or JIS X 0201: switch to ASCII */
myConverterData.fromU2022State.cs[0] = ASCII;
buffer[i++] = 0x28;
buffer[i++] = 0x42;
}
-
+
buffer[i++] = subchar[0];
-
+
err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
return err;
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int sourceChar;
byte[] buffer = new byte[8];
boolean getTrail = false; // use for getTrail label
int oldSourcePos; // for proper error handling
-
+
choiceCount = 0;
-
+
/* check if the last codepoint of previous buffer was a lead surrogate */
if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
getTrail = true;
}
-
+
while (getTrail || source.hasRemaining()) {
if (getTrail || target.hasRemaining()) {
oldSourcePos = source.position();
/* check if the char is a First surrogate */
if (getTrail || UTF16.isSurrogate((char)sourceChar)) {
if (getTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
-// getTrail:
+// getTrail:
if (getTrail) {
getTrail = false;
}
break;
}
}
-
+
/* do not convert SO/SI/ESC */
if (IS_2022_CONTROL(sourceChar)) {
/* callback(illegal) */
fromUChar32 = sourceChar;
break;
}
-
+
/* do the conversion */
-
+
if (choiceCount == 0) {
char csm;
/*
*/
csm = (char)jpCharsetMasks[myConverterData.version];
choiceCount = 0;
-
+
/* JIS7/8: try single-byte half-width Katakana before JISX208 */
if (myConverterData.version == 3 || myConverterData.version == 4) {
choices[choiceCount++] = HWKANA_7BIT;
}
/* Do not try single-bit half-width Katakana for other versions. */
csm &= ~CSM(HWKANA_7BIT);
-
+
/* try the current G0 charset */
choices[choiceCount++] = cs = myConverterData.fromU2022State.cs[0];
csm &= ~CSM(cs);
-
+
/* try the current G2 charset */
if ((cs = myConverterData.fromU2022State.cs[2]) != 0) {
choices[choiceCount++] = cs;
}
}
}
-
+
cs = g = 0;
- /*
+ /*
* len==0: no mapping found yet
* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks
* len>0: found a roundtrip result, done
* an early fallback with a later one.
*/
usingFallback = useFallback;
-
+
for (int i = 0; i < choiceCount && len <= 0; i++) {
int[] value = new int[1];
int len2;
/* Shift U+FF61..U+FF9F to bytes A1..DF. */
targetValue = (int)(UConverterConstants.UNSIGNED_INT_MASK & (sourceChar - (HWKANA_START - 0xa1)));
len = 1;
-
+
cs = myConverterData.fromU2022State.cs[0];
if (IS_JP_DBCS(cs)) {
/* switch from a DBCS charset to JISX201 */
break;
}
}
-
+
if (len != 0) {
if (len < 0) {
len = -len; /* fallback */
}
outLen = 0;
-
+
/* write SI if necessary (only for JIS7 */
if (myConverterData.fromU2022State.g == 1 && g == 0) {
buffer[outLen++] = UConverterConstants.SI;
myConverterData.fromU2022State.g = 0;
}
-
+
/* write the designation sequence if necessary */
if (cs != myConverterData.fromU2022State.cs[g]) {
for (int i = 0; i < escSeqChars[cs].length; i++) {
buffer[outLen++] = escSeqChars[cs][i];
}
myConverterData.fromU2022State.cs[g] = cs;
-
+
/* invalidate the choices[] */
choiceCount = 0;
}
-
+
/* write the shift sequence if necessary */
if (g != myConverterData.fromU2022State.g) {
switch (g) {
/* case 3: no SS3 in ISO-2022-JP-x */
}
}
-
+
/* write the output bytes */
if (len == 1) {
buffer[outLen++] = (byte)targetValue;
fromUChar32 = sourceChar;
break;
}
-
+
if (sourceChar == CR || sourceChar == LF) {
/* reset the G2 state at the end of a line (conversion got use into ASCII or JISX201 already) */
myConverterData.fromU2022State.cs[2] = 0;
choiceCount = 0;
}
-
+
/* output outLen>0 bytes in buffer[] */
if (outLen == 1) {
target.put(buffer[0]);
break;
}
}
-
+
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-JP conversion
* we need to be in ASCII mode at the very end
- *
+ *
* conditions:
* successful
* in SO mode or not in ASCII mode
(myConverterData.fromU2022State.g != 0 || myConverterData.fromU2022State.cs[0] != ASCII) &&
flush && !source.hasRemaining() && fromUChar32 == 0) {
int sourceIndex;
-
+
outLen = 0;
-
+
if (myConverterData.fromU2022State.g != 0) {
buffer[outLen++] = UConverterConstants.SI;
myConverterData.fromU2022State.g = 0;
}
-
+
if (myConverterData.fromU2022State.cs[0] != ASCII) {
for (int i = 0; i < escSeqChars[ASCII].length; i++) {
buffer[outLen++] = escSeqChars[ASCII][i];
}
myConverterData.fromU2022State.cs[0] = ASCII;
}
-
+
/* get the source index of the last input character */
sourceIndex = source.position();
if (sourceIndex > 0) {
} else {
sourceIndex = -1;
}
-
+
err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, outLen, target, offsets, sourceIndex);
}
return err;
* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165
* SS2 <ESC>N CNS-11643-1992 Plane 2
* SS3 <ESC>O CNS-11643-1992 Planes 3-7
- * vi)
+ * vi)
* SOdesignator : ESC "$" ")" finalchar_for_SO
* SS2designator : ESC "$" "*" finalchar_for_SS2
* SS3designator : ESC "$" "+" finalchar_for_SS3
- *
+ *
* ESC $ ) A Indicates the bytes following SO are Chinese
* characters as defined in GB 2312-80, until
* another SOdesignation appears
- *
+ *
* ESC $ ) E Indicates the bytes following SO are as defined
* in ISO-IR-165 (for details, see section 2.1),
* until another SOdesignation appears
- *
+ *
* ESC $ ) G Indicates the bytes following SO are as defined
* in CNS 11643-plane-1, until another SOdesignation appears
- *
+ *
* ESC $ * H Indicates teh two bytes immediately following
* SS2 is a Chinese character as defined in CNS
* 11643-plane-2, until another SS2designation
* appears
* (Meaning <ESC>N must preceed ever 2 byte sequence.)
- *
+ *
* ESC $ + I Indicates the immediate two bytes following SS3
* is a Chinese character as defined in CNS
* 11643-plane-3, until another SS3designation
* appears
* (Meaning <ESC>O must preceed every 2 byte sequence.)
- *
+ *
* ESC $ + J Indicates the immediate two bytes following SS3
* is a Chinese character as defined in CNS
* 11643-plane-4, until another SS3designation
* appears
* (In English: <ESC>O must preceed every 2 byte sequence.)
- *
+ *
* ESC $ + K Indicates the immediate two bytes following SS3
* is a Chinese character as defined in CNS
* 11643-plane-5, until another SS3designation
* appears
- *
+ *
* ESC $ + L Indicates the immediate two bytes following SS3
* is a Chinese character as defined in CNS
* 11643-plane-6, until another SS3designation
* appears
- *
+ *
* ESC $ + M Indicates the immediate two bytes following SS3
* is a Chinese character as defined in CNS
* 11643-plane-7, until another SS3designation
* appears
- *
+ *
* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and
* has its own designation information before any Chinese chracters
* appears
*/
-
+
/* The following are defined this way to make strings truely readonly */
private final static byte[] GB_2312_80_STR = { 0x1B, 0x24, 0x29, 0x41 };
private final static byte[] ISO_IR_165_STR = { 0x1B, 0x24, 0x29, 0x45 };
private final static byte[] CNS_11643_1992_Plane_5_STR = { 0x1B, 0x24, 0x2B, 0x4B };
private final static byte[] CNS_11643_1992_Plane_6_STR = { 0x1B, 0x24, 0x2B, 0x4C };
private final static byte[] CNS_11643_1992_Plane_7_STR = { 0x1B, 0x24, 0x2B, 0x4D };
-
+
/************************ ISO2022-CN Data *****************************/
private final static byte[][] escSeqCharsCN = {
SHIFT_IN_STR,
CNS_11643_1992_Plane_6_STR,
CNS_11643_1992_Plane_7_STR,
};
-
+
private class CharsetEncoderISO2022CN extends CharsetEncoderICU {
public CharsetEncoderISO2022CN(CharsetICU cs) {
super(cs, fromUSubstitutionChar[0]);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
myConverterData.reset();
}
-
+
/* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
- CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ @Override
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
CharBuffer source, ByteBuffer target, IntBuffer offsets){
CoderResult err = CoderResult.UNDERFLOW;
byte[] buffer = new byte[8];
int i = 0;
byte[] subchar;
subchar = encoder.replacement();
-
+
if (myConverterData.fromU2022State.g != 0) {
/* not in ASCII mode: switch to ASCII */
myConverterData.fromU2022State.g = 0;
buffer[i++] = UConverterConstants.SI;
}
buffer[i++] = subchar[0];
-
+
err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
return err;
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int sourceChar;
boolean usingFallback;
boolean gotoGetTrail = false;
int oldSourcePos; // For proper error handling
-
+
choiceCount = 0;
-
+
/* check if the last codepoint of previous buffer was a lead surrogate */
if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
// goto getTrail label
- gotoGetTrail = true;
+ gotoGetTrail = true;
}
-
+
while (source.hasRemaining() || gotoGetTrail) {
if (target.hasRemaining() || gotoGetTrail) {
oldSourcePos = source.position();
// getTrail label
/* reset gotoGetTrail flag*/
gotoGetTrail = false;
-
+
/* look ahead to find the trail surrogate */
if (source.hasRemaining()) {
/* test the following code unit */
break;
}
}
-
+
/* do the conversion */
if (sourceChar <= 0x007f) {
/* do not converter SO/SI/ESC */
fromUChar32 = sourceChar;
break;
}
-
+
/* US-ASCII */
if (myConverterData.fromU2022State.g == 0) {
buffer[0] = (byte)sourceChar;
myConverterData.fromU2022State.g = 0;
choiceCount = 0;
}
-
+
if (sourceChar == CR || sourceChar == LF) {
/* reset the state at the end of a line */
myConverterData.fromU2022State.reset();
/* convert U+0080..U+10ffff */
int i;
byte cs, g;
-
+
if (choiceCount == 0) {
/* try the current SO/G1 converter first */
choices[0] = myConverterData.fromU2022State.cs[1];
-
+
/* default to GB2312_1 if none is designated yet */
if (choices[0] == 0) {
choices[0] = GB2312_1;
} else {
choices[1] = GB2312_1;
}
-
+
choiceCount = 2;
} else if (myConverterData.version == 1) {
/* ISO-2022-CN-EXT */
-
+
/* try one of the other converters */
switch (choices[0]) {
case GB2312_1:
choices[2] = ISO_IR_165;
break;
}
-
+
choiceCount = 3;
} else {
/* ISO-2022-CN-CNS */
choices[0] = CNS_11643_1;
choices[1] = GB2312_1;
-
+
choiceCount = 2;
}
}
-
+
cs = g = 0;
/*
* len==0: no mapping found yet
* an early fallback with a later one.
*/
usingFallback = useFallback;
-
+
for (i = 0; i < choiceCount && len <= 0; ++i) {
byte cs0 = choices[i];
if (cs0 > 0) {
}
}
}
-
+
if (len != 0) {
len = 0; /* count output bytes; it must have ben abs(len) == 2 */
-
+
/* write the designation sequence if necessary */
if (cs != myConverterData.fromU2022State.cs[g]) {
if (cs < CNS_11643) {
choiceCount = 0;
}
}
-
+
/* write the shift sequence if necessary */
if (g != myConverterData.fromU2022State.g) {
switch (g) {
case 1:
buffer[len++] = UConverterConstants.SO;
-
+
/* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */
myConverterData.fromU2022State.g = 1;
break;
break;
}
}
-
+
/* write the two output bytes */
buffer[len++] = (byte)(targetValue >> 8);
buffer[len++] = (byte)targetValue;
break;
}
} /* end while (source.hasRemaining() */
-
+
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-CN conversion
* we need to be in ASCII mode at the very end
- *
+ *
* condtions:
* succesful
* not in ASCII mode
*/
if (!err.isError() && myConverterData.fromU2022State.g != 0 && flush && !source.hasRemaining() && fromUChar32 == 0) {
int sourceIndex;
-
+
/* we are switching to ASCII */
myConverterData.fromU2022State.g = 0;
-
+
/* get the source index of the last input character */
sourceIndex = source.position();
if (sourceIndex > 0) {
--sourceIndex;
- if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
+ if (UTF16.isTrailSurrogate(source.get(sourceIndex)) &&
(sourceIndex == 0 || UTF16.isLeadSurrogate(source.get(sourceIndex-1)))) {
--sourceIndex;
}
} else {
sourceIndex = -1;
}
-
+
err = CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
}
-
+
return err;
}
}
public CharsetEncoderISO2022KR(CharsetICU cs) {
super(cs, fromUSubstitutionChar[myConverterData.version]);
}
-
+
+ @Override
protected void implReset() {
super.implReset();
myConverterData.reset();
setInitialStateFromUnicodeKR(this);
}
-
+
/* This overrides the cbFromUWriteSub method in CharsetEncoderICU */
- CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
+ @Override
+ CoderResult cbFromUWriteSub (CharsetEncoderICU encoder,
CharBuffer source, ByteBuffer target, IntBuffer offsets){
CoderResult err = CoderResult.UNDERFLOW;
byte[] buffer = new byte[8];
int length, i = 0;
byte[] subchar;
-
+
subchar = encoder.replacement();
length = subchar.length;
-
+
if (myConverterData.version == 0) {
if (length == 1) {
if (encoder.fromUnicodeStatus != 0) {
buffer[i++] = subchar[1];
}
err = CharsetEncoderICU.fromUWriteBytes(this, buffer, 0, i, target, offsets, source.position() - 1);
- } else {
+ } else {
/* save the subvonverter's substitution string */
byte[] currentSubChars = myConverterData.currentEncoder.replacement();
-
+
/* set our substitution string into the subconverter */
myConverterData.currentEncoder.replaceWith(subchar);
myConverterData.currentConverter.subChar1 = fromUSubstitutionChar[0][0];
myConverterData.currentEncoder.fromUChar32 = encoder.fromUChar32;
err = myConverterData.currentEncoder.cbFromUWriteSub(myConverterData.currentEncoder, source, target, offsets);
encoder.fromUChar32 = myConverterData.currentEncoder.fromUChar32;
-
+
/* restore the subconverter's substitution string */
myConverterData.currentEncoder.replaceWith(currentSubChars);
-
+
if (err.isOverflow()) {
if (myConverterData.currentEncoder.errorBufferLength > 0) {
encoder.errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
myConverterData.currentEncoder.errorBufferLength = 0;
}
}
-
+
return err;
}
-
+
private CoderResult encodeLoopIBM(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
myConverterData.currentEncoder.fromUChar32 = fromUChar32;
err = myConverterData.currentEncoder.cnvMBCSFromUnicodeWithOffsets(source, target, offsets, flush);
fromUChar32 = myConverterData.currentEncoder.fromUChar32;
-
+
if (err.isOverflow()) {
if (myConverterData.currentEncoder.errorBufferLength > 0) {
errorBuffer = myConverterData.currentEncoder.errorBuffer.clone();
errorBufferLength = myConverterData.currentEncoder.errorBufferLength;
myConverterData.currentEncoder.errorBufferLength = 0;
}
-
+
return err;
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
int[] targetByteUnit = { 0x0000 };
boolean usingFallback;
int length = 0;
boolean gotoGetTrail = false; // for goto getTrail label call
-
+
/*
* if the version is 1 then the user is requesting
* conversion with ibm-25546 pass the argument to
if (myConverterData.version == 1) {
return encodeLoopIBM(source, target, offsets, flush);
}
-
+
usingFallback = useFallback;
isTargetByteDBCS = fromUnicodeStatus == 0 ? false : true;
if ((sourceChar = fromUChar32) != 0 && target.hasRemaining()) {
gotoGetTrail = true;
}
-
+
while (source.hasRemaining() || gotoGetTrail) {
targetByteUnit[0] = UConverterConstants.missingCharMarker;
-
+
if (target.hasRemaining() || gotoGetTrail) {
if (!gotoGetTrail) {
sourceChar = source.get();
-
+
/* do not convert SO/SI/ESC */
if (IS_2022_CONTROL(sourceChar)) {
/* callback(illegal) */
}
myConverterData.currentConverter.sharedData.mbcs.outputType = CharsetMBCS.MBCS_OUTPUT_2;
length = myConverterData.currentEncoder.fromUChar32(sourceChar, targetByteUnit, usingFallback);
- //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
+ //length = MBCSFromUChar32_ISO2022(myConverterData.currentConverter.sharedData, sourceChar, targetByteUnit, usingFallback, CharsetMBCS.MBCS_OUTPUT_2);
if (length < 0) {
length = -length; /* fallback */
}
errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0] - 0x80));
err = CoderResult.OVERFLOW;
}
-
+
} else {
errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & ((targetByteUnit[0]>>8) - 0x80));
errorBuffer[errorBufferLength++] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (targetByteUnit[0]- 0x80));
/* oops.. the code point is unassigned
* set the error and reason
*/
-
+
/* check if the char is a First surrogate */
if (gotoGetTrail || UTF16.isSurrogate((char)sourceChar)) {
if (gotoGetTrail || UTF16.isLeadSurrogate((char)sourceChar)) {
// getTrail label
// reset gotoGetTrail flag
gotoGetTrail = false;
-
+
/* look ahead to find the trail surrogate */
if (source.hasRemaining()) {
/* test the following code unit */
/* callback(unassigned) for a BMP code point */
err = CoderResult.unmappableForLength(1);
}
-
+
fromUChar32 = sourceChar;
break;
}
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-KR conversion
* we need to be inASCII mode at the very end
- *
+ *
* conditions:
* successful
* not in ASCII mode
*/
if (!err.isError() && isTargetByteDBCS && flush && !source.hasRemaining() && fromUChar32 == 0) {
int sourceIndex;
-
+
/* we are switching to ASCII */
isTargetByteDBCS = false;
-
+
/* get the source index of the last input character */
sourceIndex = source.position();
if (sourceIndex > 0) {
} else {
sourceIndex = -1;
}
-
+
CharsetEncoderICU.fromUWriteBytes(this, SHIFT_IN_STR, 0, 1, target, offsets, sourceIndex);
}
/*save the state and return */
fromUnicodeStatus = isTargetByteDBCS ? 1 : 0;
-
+
return err;
}
}
-
+
+ @Override
public CharsetDecoder newDecoder() {
switch (variant) {
case ISO_2022_JP:
return new CharsetDecoderISO2022JP(this);
-
+
case ISO_2022_CN:
return new CharsetDecoderISO2022CN(this);
-
+
case ISO_2022_KR:
setInitialStateToUnicodeKR();
return new CharsetDecoderISO2022KR(this);
-
+
default: /* should not happen */
return null;
}
}
-
+
+ @Override
public CharsetEncoder newEncoder() {
CharsetEncoderICU cnv;
-
+
switch (variant) {
case ISO_2022_JP:
return new CharsetEncoderISO2022JP(this);
-
+
case ISO_2022_CN:
return new CharsetEncoderISO2022CN(this);
-
+
case ISO_2022_KR:
cnv = new CharsetEncoderISO2022KR(this);
setInitialStateFromUnicodeKR(cnv);
return cnv;
-
+
default: /* should not happen */
return null;
}
}
-
+
private void setInitialStateToUnicodeKR() {
if (myConverterData.version == 1) {
myConverterData.currentDecoder.toUnicodeStatus = 0; /* offset */
myConverterData.currentEncoder.fromUnicodeStatus = 1; /* prevLength */
}
}
-
+
+ @Override
void getUnicodeSetImpl(UnicodeSet setFillIn, int which) {
int i;
/*open a set and initialize it with code points that are algorithmically round-tripped */
-
+
switch(variant){
case ISO_2022_JP:
/*include JIS X 0201 which is hardcoded */
if((jpCharsetMasks[myConverterData.version]&CSM(ISO8859_1))!=0){
/*include Latin-1 some variants of JP */
setFillIn.add(0, 0xff);
-
+
}
else {
/* include ASCII for JP */
default:
break;
}
-
+
//TODO Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until
for(i=0; i<UCNV_2022_MAX_CONVERTERS;i++){
int filter;
if(myConverterData.myConverterArray[i]!=null){
if(variant==ISO_2022_CN && myConverterData.version==0 && i==CNS_11643){
/*
- *
+ *
* version -specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* they are all available in the CNS conversion table;
*/
filter=CharsetMBCS.UCNV_SET_FILTER_2022_CN;
} else if(variant==ISO_2022_JP && i == JISX208){
- /*
+ /*
* Only add code points that map to Shift-JIS codes
* corrosponding to JIS X 208
*/
} else {
filter=CharsetMBCS.UCNV_SET_FILTER_NONE;
}
-
+
myConverterData.currentConverter.MBCSGetFilteredUnicodeSetForUnicode(myConverterData.myConverterArray[i],setFillIn, which, filter);
}
}
setFillIn.remove(0x0e);
setFillIn.remove(0x0f);
setFillIn.remove(0x1b);
-
+
/* ISO 2022 converter do not convert C! controls either */
setFillIn.remove(0x80, 0x9f);
}
/*
* LMBCS
- *
+ *
* (Lotus Multi-Byte Character Set)
- *
+ *
* LMBS was invented in the alte 1980's and is primarily used in Lotus Notes
* databases and in Lotus 1-2-3 files. Programmers who work with the APIs
* into these products will sometimes need to deal with strings in this format.
- *
+ *
* The code in this file provides an implementation for an ICU converter of
* LMBCS to and from Unicode.
- *
+ *
* Since the LMBCS character set is only sparsely documented in existing
* printed or online material, we have added extensive annotation to this
* file to serve as a guide to understanding LMBCS.
- *
+ *
* LMBCS was originally designed with these four sometimes-competing design goals:
* -Provide encodings for characters in 12 existing national standards
* (plus a few other characters)
* -Minimal memory footprint
* -Maximal speed of conversion into the existing national character sets
* -No need to track a changing state as you interpret a string.
- *
+ *
* All of the national character sets LMBCS was trying to encode are 'ANSI'
* based, in that the bytes from 0x20 - 0x7F are almost exactly the
* same common Latin unaccented characters and symbols in all character sets.
- *
+ *
* So, in order to help meet the speed & memory design goals, the common ANSI
* bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS.
*/
private static final short ULMBCS_GRP_KO = 0x11; /* Korean :ibm-1261 */
private static final short ULMBCS_GRP_TW = 0x12; /* Chinese SC :ibm-950 */
private static final short ULMBCS_GRP_CN = 0x13; /* Chinese TC :ibm-1386 */
- /*
+ /*
* So, the beginnning of understanding LMBCS is that IF the first byte of a LMBCS
- * character is one of those 12 values, you can interpret the remaining bytes of
+ * character is one of those 12 values, you can interpret the remaining bytes of
* that character as coming from one of those character sets. Since the lower
* ANSI bytes already are represented in singl bytes, using one of the chracter
* set announcers is used to announce a character that starts with a byte of
* 0x80 or greater.
- *
+ *
* The character sets are arranged so that the single byte sets all appear
* before the multi-byte character sets. When we need to tell whether a
* group byte is for a single byte char set or not we use this definition:
/*
* However, to fully understand LMBCS, you must also understand a series of
* exceptions & optimizations made in service of the design goals.
- *
+ *
* First, those of you who are character set mavens may have noticed that
* the 'double-byte' character sets are actually multi-byte chracter sets
* that can have 1 or two bytes, even in upper-ascii range. To force
* to introduce any single-byte character > 0x80 in an otherwise double-byte
* character set. So, for example, the LMBCS sequence x10 x10 xAE is the
* same as '0xAE' in the Japanese code page 943.
- *
+ *
* Next, you will notice that the list of group bytes has some gaps.
* These are used in various ways.
- *
+ *
* We reserve a few special single byte values for common control
* characters. These are in the same place as their ANSI equivalents for speed.
*/
private static final short ULMBCS_GRP_UNICODE = 0x14;
/*
* The two bytes appearing after a 0x14 are interpreted as UTF-16 BE
- * (Big Endian) characters. The exception comes when UTF16
+ * (Big Endian) characters. The exception comes when UTF16
* representation would have a zero as the second byte. In that case,
* 'F6' is used in its place, and the bytes are swapped. (This prevents
* LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK:
* translations even faster, sometimes the optimization group byte can be dropped
* from a LMBCS character. This is decided on a process-by-process basis. The
* group byte that is dropped is called the 'optimization group.'
- *
+ *
* For Notes, the optimization group is always 0x1.
*/
//private static final short ULMBCS_DEFAULTOPTGROUP = 0x01;
* etc.). Using plain 'LMBCS' as the name of the converter will give you
* LMBCS-1.
*/
-
+
/* Implementation strategy */
- /*
+ /*
* Because of the extensive use of other character sets, the LMBCS converter
* keeps a mapping between optimization groups and IBM character sets, so that
* ICU converters can be created and used as needed.
- *
+ *
* As you can see, even though any byte below 0x20 could be an optimization
* byte, only those at 0x13 or below can map to an actual converter. To limit
* some loops and searches, we define a value for that last group converter:
*/
private static final short ULMBCS_GRP_LAST = 0x13; /* last LMBCS group that has a converter */
-
+
private static final String[] OptGroupByteToCPName = {
/* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */
/* 0x0001 */ "ibm-850",
/* 0x0013 */ "windows-936",
/* The rest are null, including the 0x0014 Unicode compatibility region
* and 0x0019, the 1-2-3 system range control char */
- /* 0x0014 */ null
+ /* 0x0014 */ null
};
-
+
/* That's approximately all the data that's needed for translating
* LMBCS to Unicode.
- *
+ *
* However, to translate Unicode to LMBCS, we need some more support.
- *
+ *
* That's because there are often more than one possible mappings from a Unicode
* code point back into LMBCS. The first thing we do is look up into a table
* to figure out if there are more than one possible mapplings. This table,
LMBCS mbcs native encoding
(example: Unihan) */
private static final short ULMBCS_AMBIGUOUS_ALL = 0x82;
-
+
/* And here's a simple way to see if a group falls in an appropriate range */
private boolean ULMBCS_AMBIGUOUS_MATCH(short agroup, short xgroup) {
return (((agroup == ULMBCS_AMBIGUOUS_SBCS) &&
(xgroup >= ULMBCS_DOUBLEOPTGROUP_START)) ||
((agroup) == ULMBCS_AMBIGUOUS_ALL));
}
-
+
/* The table & some code to use it: */
private static class _UniLMBCSGrpMap {
int uniStartRange;
this.GrpType = GrpType;
}
}
-
+
private static final _UniLMBCSGrpMap[] UniLMBCSGrpMap = {
new _UniLMBCSGrpMap(0x0001, 0x001F, ULMBCS_GRP_CTRL),
new _UniLMBCSGrpMap(0x0080, 0x009F, ULMBCS_GRP_CTRL),
new _UniLMBCSGrpMap(0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS),
new _UniLMBCSGrpMap(0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE)
};
-
+
static short FindLMBCSUniRange(char uniChar) {
int index = 0;
-
+
while (uniChar > UniLMBCSGrpMap[index].uniEndRange) {
index++;
}
-
+
if (uniChar >= UniLMBCSGrpMap[index].uniStartRange) {
return UniLMBCSGrpMap[index].GrpType;
}
return ULMBCS_GRP_UNICODE;
}
-
+
/*
* We also ask the creator of a converter to send in a preferred locale
* that we can use in resolving ambiguous mappings. They send the locale
* in as a string, and we map it, if possible, to one of the
* LMBCS groups. We use this table, and the associated code, to
* do the lookup:
- *
+ *
* This table maps locale ID's to LMBCS opt groups.
* The default return is group 0x01. Note that for
* performance reasons, the table is sorted in
};
static short FindLMBCSLocale(String LocaleID) {
int index = 0;
-
+
if (LocaleID == null) {
return 0;
}
-
+
while (LocaleLMBCSGrpMap[index].LocaleID != null) {
if (LocaleLMBCSGrpMap[index].LocaleID == LocaleID) {
return LocaleLMBCSGrpMap[index].OptGroup;
}
return ULMBCS_GRP_L1;
}
-
+
/*
* Before we get to the main body of code, here's how we hook up the rest
* of ICU. ICU converters are required to define a structure that includes
decoder = (CharsetDecoderMBCS)charset.newDecoder();
}
}
-
+
private UConverterDataLMBCS extraInfo; /* extraInfo in ICU4C implementation */
-
+
public CharsetLMBCS(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
- maxBytesPerChar = ULMBCS_CHARSIZE_MAX;
+ maxBytesPerChar = ULMBCS_CHARSIZE_MAX;
minBytesPerChar = 1;
maxCharsPerByte = 1;
-
+
extraInfo = new UConverterDataLMBCS();
-
+
for (int i = 0; i <= ULMBCS_GRP_LAST; i++) {
if (OptGroupByteToCPName[i] != null) {
extraInfo.OptGrpConverter[i] = ((CharsetMBCS)CharsetICU.forNameICU(OptGroupByteToCPName[i])).sharedData;
}
}
-
+
//get the Opt Group number for the LMBCS converter
int option = Integer.parseInt(icuCanonicalName.substring(6));
extraInfo.OptGroup = (short)option;
extraInfo.localeConverterIndex = FindLMBCSLocale(ULocale.getDefault().getBaseName());
}
-
+
class CharsetDecoderLMBCS extends CharsetDecoderICU {
public CharsetDecoderLMBCS(CharsetICU cs) {
super(cs);
implReset();
}
-
+
+ @Override
protected void implReset() {
super.implReset();
}
-
+
/* A function to call when we are looking at the Unicode group byte in LMBCS */
private char GetUniFromLMBCSUni(ByteBuffer ppLMBCSin) {
short HighCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
short LowCh = (short)(ppLMBCSin.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
-
+
if (HighCh == ULMBCS_UNICOMPATZERO) {
HighCh = LowCh;
LowCh = 0; /* zero-byte in LSB special character */
}
-
+
return (char)((HighCh << 8) | LowCh);
}
-
+
private int LMBCS_SimpleGetNextUChar(UConverterSharedData cnv, ByteBuffer source, int positionOffset, int length) {
int uniChar;
int oldSourceLimit;
int oldSourcePos;
-
+
extraInfo.charset.sharedData = cnv;
-
+
oldSourceLimit = source.limit();
oldSourcePos = source.position();
-
+
source.position(oldSourcePos + positionOffset);
source.limit(source.position() + length);
-
+
uniChar = extraInfo.decoder.simpleGetNextUChar(source, false);
-
+
source.limit(oldSourceLimit);
source.position(oldSourcePos);
private int LMBCSGetNextUCharWorker(ByteBuffer source, CoderResult[] err) {
int uniChar = 0; /* an output Unicode char */
short CurByte; /* A byte from the input stream */
-
+
/* error check */
if (!source.hasRemaining()) {
err[0] = CoderResult.malformedForLength(0);
}
/* Grab first byte & save address for error recovery */
CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
-
+
/*
* at entry of each if clause:
* 1. 'CurByte' points at the first byte of a LMBCS character
* 2. 'source' points to the next byte of the source stream after 'CurByte'
- *
+ *
* the job of each if clause is:
* 1. set 'source' to the point at the beginning of the next char (not if LMBCS char is only 1 byte)
* 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately
if ((CurByte > ULMBCS_C0END && CurByte < ULMBCS_C1START) /* ascii range */ ||
CurByte == 0 || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR || CurByte == ULMBCS_LF ||
CurByte == ULMBCS_123SYSTEMRANGE) {
-
+
uniChar = CurByte;
} else {
short group;
UConverterSharedData cnv;
-
+
if (CurByte == ULMBCS_GRP_CTRL) { /* Control character group - no opt group update */
short C0C1byte;
/* CHECK_SOURCE_LIMIT(1) */
source.position(source.limit());
return 0xFFFF;
}
-
+
/* don't check for error indicators fffe/ffff below */
return GetUniFromLMBCSUni(source);
} else if (CurByte <= ULMBCS_CTRLOFFSET) {
source.position(source.limit());
return 0xFFFF;
}
-
+
/* check for LMBCS doubled-group-byte case */
if (source.get(source.position()) == group) {
/* single byte */
return 0xFFFF;
}
CurByte = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
-
+
if (CurByte >= ULMBCS_C1START) {
uniChar = CharsetMBCS.MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv.mbcs, CurByte);
} else {
* AND the second byte is not in the upper ascii range
*/
byte[] bytes = new byte[2];
-
+
cnv = extraInfo.OptGrpConverter[ULMBCS_GRP_EXCEPT];
-
+
/* Lookup value must include opt group */
bytes[0] = (byte)group;
bytes[1] = (byte)CurByte;
uniChar = LMBCS_SimpleGetNextUChar(cnv, ByteBuffer.wrap(bytes), 0, 2);
}
}
-
+
} else if (CurByte >= ULMBCS_C1START) { /* group byte is implicit */
group = extraInfo.OptGroup;
cnv = extraInfo.OptGrpConverter[group];
source.position(source.limit());
return 0xFFFF;
}
-
+
/* let the MBCS conversion consume CurByte again */
uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 1);
} else {
source.position(source.limit());
return 0xFFFF;
}
-
+
/* let the MBCS conversion consume CurByte again */
uniChar = LMBCS_SimpleGetNextUChar(cnv, source, -1, 2);
source.get();
}
}
}
-
+
return uniChar;
}
-
- protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+
+ @Override
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult[] err = new CoderResult[1];
err[0] = CoderResult.UNDERFLOW;
byte[] LMBCS = new byte[ULMBCS_CHARSIZE_MAX * 2]; /* Increase the size for proper handling in subsequent calls to MBCS functions */
int saveSource; /* beginning of current code point */
int errSource = 0; /* index to actual input in case an error occurs */
byte savebytes = 0;
-
+
/* Process from source to limit, or until error */
while (err[0].isUnderflow() && source.hasRemaining() && target.hasRemaining()) {
saveSource = source.position(); /* beginning of current code point */
if (toULength > 0) { /* reassemble char from previous call */
int size_old = toULength;
ByteBuffer tmpSourceBuffer;
-
+
/* limit from source is either remainder of temp buffer, or user limit on source */
int size_new_maybe_1 = ULMBCS_CHARSIZE_MAX - size_old;
int size_new_maybe_2 = source.remaining();
uniChar = (char)LMBCSGetNextUCharWorker(tmpSourceBuffer, err);
source.position(saveSource + tmpSourceBuffer.position() - size_old);
errSource = saveSource - size_old;
-
- if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */
+
+ if (err[0].isOverflow()) { /* err == U_TRUNCATED_CHAR_FOUND */
/* evil special case: source buffers so small a char spans more than 2 buffers */
toULength = savebytes;
for (int i = 0; i < savebytes; i++) {
uniChar = (char)LMBCSGetNextUCharWorker(source, err);
savebytes = (byte)(source.position() - saveSource);
}
-
+
if (err[0].isUnderflow()) {
if (uniChar < 0x0fffe) {
target.put(uniChar);
return err[0];
}
}
-
+
class CharsetEncoderLMBCS extends CharsetEncoderICU {
public CharsetEncoderLMBCS(CharsetICU cs) {
super(cs, fromUSubstitution);
implReset();
}
-
+
+ @Override
protected void implReset() {
super.implReset();
}
private int LMBCSConversionWorker(short group, byte[] LMBCS, char pUniChar, short[] lastConverterIndex, boolean[] groups_tried) {
byte pLMBCS = 0;
UConverterSharedData xcnv = extraInfo.OptGrpConverter[group];
-
+
int bytesConverted;
int[] value = new int[1];
short firstByte;
-
+
extraInfo.charset.sharedData = xcnv;
bytesConverted = extraInfo.encoder.fromUChar32(pUniChar, value, false);
-
+
/* get the first result byte */
if (bytesConverted > 0) {
firstByte = (short)((value[0] >> ((bytesConverted - 1) * 8)) & UConverterConstants.UNSIGNED_BYTE_MASK);
groups_tried[group] = true;
return 0;
}
-
+
lastConverterIndex[0] = group;
-
- /*
+
+ /*
* All initial byte values in lower ascii range should have been caught by now,
* except with the exception group.
*/
-
+
/* use converted data: first write 0, 1 or two group bytes */
if (group != ULMBCS_GRP_EXCEPT && extraInfo.OptGroup != group) {
LMBCS[pLMBCS++] = (byte)group;
LMBCS[pLMBCS++] = (byte)group;
}
}
-
+
/* don't emit control chars */
if (bytesConverted == 1 && firstByte < 0x20) {
return 0;
}
-
+
/* then move over the converted data */
switch (bytesConverted) {
case 4:
/* will never occur */
break;
}
-
+
return pLMBCS;
}
/*
int index = 0;
short LowCh = (short)(uniChar & UConverterConstants.UNSIGNED_BYTE_MASK);
short HighCh = (short)((uniChar >> 8) & UConverterConstants.UNSIGNED_BYTE_MASK);
-
+
LMBCS[index++] = (byte)ULMBCS_GRP_UNICODE;
-
+
if (LowCh == 0) {
LMBCS[index++] = (byte)ULMBCS_UNICOMPATZERO;
LMBCS[index++] = (byte)HighCh;
return ULMBCS_UNICODE_SIZE;
}
/* The main Unicode to LMBCS conversion function */
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult err = CoderResult.UNDERFLOW;
short[] lastConverterIndex = new short[1];
int bytes_written;
boolean[] groups_tried = new boolean[ULMBCS_GRP_LAST+1];
int sourceIndex = 0;
-
+
/*
* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS)
* If that succeeds, see if it will all fit into the target & copy it over
* if it does.
- *
+ *
* We try conversions in the following order:
* 1. Single-byte ascii & special fixed control chars (&null)
* 2. Look up group in table & try that (could b
* E) If its single-byte ambiguous, try the exceptions group
* 4. And as a grand fallback: Unicode
*/
-
+
short OldConverterIndex = 0;
-
+
while (source.hasRemaining() && err.isUnderflow()) {
OldConverterIndex = extraInfo.localeConverterIndex;
-
+
if (!target.hasRemaining()) {
err = CoderResult.OVERFLOW;
break;
}
-
+
uniChar = source.get(source.position());
bytes_written = 0;
pLMBCS = 0;
-
+
/* check cases in rough order of how common they are, for speed */
-
+
/* single-byte matches: strategy 1 */
if((uniChar>=0x80) && (uniChar<=0xff) && (uniChar!=0xB1) && (uniChar!=0xD7) && (uniChar!=0xF7) &&
(uniChar!=0xB0) && (uniChar!=0xB4) && (uniChar!=0xB6) && (uniChar!=0xA7) && (uniChar!=0xA8)) {
LMBCS[pLMBCS++] = (byte)uniChar;
bytes_written = 1;
}
-
+
if (bytes_written == 0) {
/* Check by Unicode rage (Strategy 2) */
short group = FindLMBCSUniRange(uniChar);
}
if (bytes_written == 0) { /* the ambiguous group cases (Strategy 3) */
groups_tried = new boolean[ULMBCS_GRP_LAST+1];
-
+
/* check for non-default optimization group (Strategy 3A) */
if (extraInfo.OptGroup != 1 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.OptGroup)) {
if(extraInfo.localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) {
bytes_written = LMBCSConversionWorker (ULMBCS_GRP_L1, LMBCS, uniChar, lastConverterIndex, groups_tried);
-
+
if(bytes_written == 0) {
bytes_written = LMBCSConversionWorker (ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
}
}
/* check for locale optimization group (Strategy 3B) */
if (bytes_written == 0 && extraInfo.localeConverterIndex > 0 && ULMBCS_AMBIGUOUS_MATCH(group, extraInfo.localeConverterIndex)) {
-
+
bytes_written = LMBCSConversionWorker(extraInfo.localeConverterIndex, LMBCS, uniChar, lastConverterIndex, groups_tried);
}
/* check for last optimization group used for this string (Strategy 3C) */
short grp_start;
short grp_end;
short grp_ix;
-
+
grp_start = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_DOUBLEOPTGROUP_START : ULMBCS_GRP_L1;
grp_end = (group == ULMBCS_AMBIGUOUS_MBCS) ? ULMBCS_GRP_LAST : ULMBCS_GRP_TH;
-
+
if(group == ULMBCS_AMBIGUOUS_ALL) {
grp_start = ULMBCS_GRP_L1;
grp_end = ULMBCS_GRP_LAST;
}
-
+
for (grp_ix = grp_start; grp_ix <= grp_end && bytes_written == 0; grp_ix++) {
if (extraInfo.OptGrpConverter[grp_ix] != null && !groups_tried[grp_ix]) {
bytes_written = LMBCSConversionWorker(grp_ix, LMBCS, uniChar, lastConverterIndex, groups_tried);
}
}
- /*
+ /*
* a final conversion fallback to the exceptions group if its likely
- * to be single byte (Strategy 3E)
+ * to be single byte (Strategy 3E)
*/
if (bytes_written == 0 && grp_start == ULMBCS_GRP_L1) {
bytes_written = LMBCSConversionWorker(ULMBCS_GRP_EXCEPT, LMBCS, uniChar, lastConverterIndex, groups_tried);
}
extraInfo.localeConverterIndex = OldConverterIndex;
}
-
+
return err;
}
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderLMBCS(this);
}
-
+
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderLMBCS(this);
}
-
+
+ @Override
void getUnicodeSetImpl(UnicodeSet setFillIn, int which){
getCompleteUnicodeSet(setFillIn);
}
private static List<Charset> icuCharsets = Collections.<Charset>emptyList();
/**
- * Default constructor
+ * Default constructor
* @stable ICU 3.6
*/
public CharsetProviderICU() {
}
/**
- * Constructs a Charset for the given charset name.
+ * Constructs a Charset for the given charset name.
* Implements the abstract method of super class.
* @param charsetName charset name
* @return Charset object for the given charset name, null if unsupported
* @stable ICU 3.6
*/
+ @Override
public final Charset charsetForName(String charsetName){
try{
// extract the options from the charset name
charsetName = charsetName.substring(0, charsetName.length() - optionsString.length());
}
// get the canonical name
- String icuCanonicalName = getICUCanonicalName(charsetName);
+ String icuCanonicalName = getICUCanonicalName(charsetName);
// create the converter object and return it
if(icuCanonicalName==null || icuCanonicalName.length()==0){
- // Try the original name, may be something added and not in the alias table.
+ // Try the original name, may be something added and not in the alias table.
// Will get an unsupported encoding exception if it doesn't work.
icuCanonicalName = charsetName;
}
}
return null;
}
-
+
/**
* Constructs a charset for the given ICU conversion table from the specified class path.
* Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
public final Charset charsetForName(String charsetName, String classPath) {
return charsetForName(charsetName, classPath, null);
}
-
+
/**
* Constructs a charset for the given ICU conversion table from the specified class path.
* This function is similar to {@link #charsetForName(String, String)}.
}
return cs;
}
-
+
/**
* Gets the canonical name of the converter as defined by Java
* @param enc converter name
} else {
ret = "";
}
-
+
}else{
/* unsupported encoding */
ret = "";
return ret;
}catch(IOException ex){
throw new UnsupportedCharsetException(enc);
- }
+ }
}
private static final Charset getCharset(String icuCanonicalName, String optionsString)
throws IOException {
@Deprecated
public static String getJavaCanonicalName(String charsetName){
/*
- If a charset listed in the IANA Charset Registry is supported by an implementation
- of the Java platform then its canonical name must be the name listed in the registry.
- Many charsets are given more than one name in the registry, in which case the registry
- identifies one of the names as MIME-preferred. If a charset has more than one registry
- name then its canonical name must be the MIME-preferred name and the other names in
- the registry must be valid aliases. If a supported charset is not listed in the IANA
+ If a charset listed in the IANA Charset Registry is supported by an implementation
+ of the Java platform then its canonical name must be the name listed in the registry.
+ Many charsets are given more than one name in the registry, in which case the registry
+ identifies one of the names as MIME-preferred. If a charset has more than one registry
+ name then its canonical name must be the MIME-preferred name and the other names in
+ the registry must be valid aliases. If a supported charset is not listed in the IANA
registry then its canonical name must begin with one of the strings "X-" or "x-".
*/
if(charsetName==null ){
return null;
- }
+ }
try{
String cName = null;
/* find out the alias with MIME tag */
/* find out the alias with IANA tag */
}else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
}else {
- /*
- check to see if an alias already exists with x- prefix, if yes then
+ /*
+ check to see if an alias already exists with x- prefix, if yes then
make that the canonical name
*/
int aliasNum = UConverterAlias.countAliases(charsetName);
break;
}
}
- /* last resort just append x- to any of the alias and
+ /* last resort just append x- to any of the alias and
make it the canonical name */
if((cName==null || cName.length()==0)){
name = UConverterAlias.getStandardName(charsetName, "UTR22");
}
return cName;
}catch (IOException ex){
-
+
}
return null;
}
- /**
+ /**
* Gets the aliases associated with the converter name
* @param encName converter name
* @return converter names as elements in an object array
int i=0;
int j=0;
String aliasArray[/*50*/] = new String[50];
-
+
if(encName != null){
aliasNum = UConverterAlias.countAliases(encName);
for(i=0,j=0;i<aliasNum;i++){
for(;--j>=0;) {
ret[j] = aliasArray[j];
}
-
+
}
return (ret);
-
+
}
/**
* @return the Charset iterator
* @stable ICU 3.6
*/
+ @Override
public final Iterator<Charset> charsets() {
loadAvailableICUCharsets();
return icuCharsets.iterator();
}
/**
- * Gets the canonical names of available ICU converters
+ * Gets the canonical names of available ICU converters
* @return array of available converter names
* @internal
* @deprecated This API is ICU internal only.
private static final short SC7=0x17; /* Select window 7 */
private static final short SD0=0x18; /* Define and select window 0 */
//private static final short SD7=0x1F; /* Define and select window 7 */
-
+
private static final short UC0=0xE0; /* Select window 0 */
private static final short UC7=0xE7; /* Select window 7 */
private static final short UD0=0xE8; /* Define and select window 0 */
private static final short UDX=0xF1; /* Define a Window as extended */
private static final short Urs=0xF2; /* reserved */
// };
-
+
// enum {
/*
* Unicode code points from 3400 to E000 are not adressible by
/* use table of predefined fixed offsets for values from fixedThreshold */
private static final int fixedThreshold=0xF9;
//};
-
+
protected byte[] fromUSubstitution = new byte[]{(byte)0x0E,(byte)0xFF, (byte)0xFD};
-
+
/* constant offsets for the 8 static windows */
private static final int staticOffsets[]={
0x0000, /* ASCII for quoted tags */
private static final int definePairTwo=5;
private static final int defineOne=6;
// };
-
+
private final static class SCSUData {
/* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
int toUDynamicOffsets[] = new int[8] ;
- int fromUDynamicOffsets[] = new int[8] ;
+ int fromUDynamicOffsets[] = new int[8] ;
/* state machine state - toUnicode */
boolean toUIsSingleByteMode;
byte locale;
byte nextWindowUseIndex;
byte windowUse[] = new byte[8];
-
+
SCSUData(){
initialize();
}
-
+
void initialize(){
for(int i=0;i<8;i++){
this.toUDynamicOffsets[i] = initialDynamicOffsets[i];
for(int i=0;i<8;i++){
this.fromUDynamicOffsets[i] = initialDynamicOffsets[i];
}
- this.nextWindowUseIndex = 0;
+ this.nextWindowUseIndex = 0;
switch(this.locale){
/* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
/* case l_ja:
for(int i=0;i<8;i++){
this.windowUse[i] = initialWindowUse[i];
}
-
+
}
}
}
-
+
static final byte initialWindowUse[]={ 7, 0, 3, 2, 4, 5, 6, 1 };
/* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
// static final byte initialWindowUse_ja[]={ 3, 2, 4, 1, 0, 7, 5, 6 };
/* Note being used right now because "SCSU,locale=ja" does not work in ICU4J. */
// private static final int l_ja = 1;
//};
-
- private SCSUData extraInfo = null;
-
+
+ private SCSUData extraInfo = null;
+
public CharsetSCSU(String icuCanonicalName, String javaCanonicalName, String[] aliases){
super(icuCanonicalName, javaCanonicalName, aliases);
- maxBytesPerChar = 3;
+ maxBytesPerChar = 3;
minBytesPerChar = 1;
maxCharsPerByte = 1;
extraInfo = new SCSUData();
}
-
- class CharsetDecoderSCSU extends CharsetDecoderICU {
+
+ class CharsetDecoderSCSU extends CharsetDecoderICU {
/* label values for supporting behavior similar to goto in C */
private static final int FastSingle=0;
private static final int SingleByteMode=1;
private static final int EndLoop=2;
-
+
/* Mode Type */
private static final int ByteMode = 0;
- private static final int UnicodeMode =1;
-
+ private static final int UnicodeMode =1;
+
public CharsetDecoderSCSU(CharsetICU cs) {
super(cs);
implReset();
}
-
+
//private SCSUData data ;
+ @Override
protected void implReset(){
super.implReset();
toULength = 0;
extraInfo.initialize();
}
-
+
short b;
-
- //Get the state machine state
+
+ //Get the state machine state
private boolean isSingleByteMode ;
private short state ;
private byte quoteWindow ;
private byte dynamicWindow ;
private short byteOne;
-
-
+
+
//sourceIndex=-1 if the current character began in the previous buffer
private int sourceIndex ;
private int nextSourceIndex ;
-
+
CoderResult cr;
SCSUData data ;
private boolean LabelLoop;// used to break the while loop
-
+
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
boolean flush){
data = extraInfo;
-
- //Get the state machine state
+
+ //Get the state machine state
isSingleByteMode = data.toUIsSingleByteMode;
state = data.toUState;
quoteWindow = data.toUQuoteWindow;
dynamicWindow = data.toUDynamicWindow;
byteOne = data.toUByteOne;
-
+
LabelLoop = true;
-
+
//sourceIndex=-1 if the current character began in the previous buffer
sourceIndex = data.toUState == readCommand ? 0: -1 ;
nextSourceIndex = 0;
-
+
cr = CoderResult.UNDERFLOW;
int labelType = 0;
while(LabelLoop){
}
return cr;
}
-
+
private int fastSingle(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
int label = 0;
if(modeType==ByteMode){
-
+
if(state==readCommand){
while(source.hasRemaining() && target.hasRemaining() && (b=(short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK)) >= 0x20){
source.position(source.position()+1);
label = SingleByteMode;
return label;
}
-
+
private int singleByteMode(ByteBuffer source, CharBuffer target, IntBuffer offsets, int modeType){
int label = SingleByteMode;
if(modeType == ByteMode){
label = EndLoop;
return label;
}
-
+
/* Store the first byte of a multibyte sequence in toUByte[] */
toUBytesArray[0] = (byte)b;
toULength = 1;
return label;
}
}
-
+
}else if(modeType==UnicodeMode){
while(source.hasRemaining()){
if(!target.hasRemaining()){
label = EndLoop;
return label;
}
-
+
private void endLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets){
if(cr==CoderResult.OVERFLOW){
state = readCommand;
LabelLoop = false;
}
}
-
+
class CharsetEncoderSCSU extends CharsetEncoderICU{
public CharsetEncoderSCSU(CharsetICU cs) {
super(cs, fromUSubstitution);
implReset();
}
-
+
//private SCSUData data;
+ @Override
protected void implReset() {
super.implReset();
extraInfo.initialize();
}
-
+
/* label values for supporting behavior similar to goto in C */
- private static final int Loop=0;
+ private static final int Loop=0;
private static final int GetTrailUnicode=1;
private static final int OutputBytes=2;
private static final int EndLoop =3;
-
+
private int delta;
private int length;
-
+
///variables of compression heuristics
private int offset;
private char lead, trail;
private int code;
private byte window;
-
- //Get the state machine state
+
+ //Get the state machine state
private boolean isSingleByteMode;
private byte dynamicWindow ;
private int currentOffset;
int c;
-
+
SCSUData data ;
-
+
//sourceIndex=-1 if the current character began in the previous buffer
private int sourceIndex ;
private int nextSourceIndex;
private int targetCapacity;
-
+
private boolean LabelLoop;//used to break the while loop
private boolean AfterGetTrail;// its value is set to true in order to ignore the code before getTrailSingle:
private boolean AfterGetTrailUnicode;// is value is set to true in order to ignore the code before getTrailUnicode:
-
+
CoderResult cr;
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
data = extraInfo;
cr = CoderResult.UNDERFLOW;
-
- //Get the state machine state
+
+ //Get the state machine state
isSingleByteMode = data.fromUIsSingleByteMode;
dynamicWindow = data.fromUDynamicWindow;
currentOffset = data.fromUDynamicOffsets[dynamicWindow];
c = fromUChar32;
-
+
sourceIndex = c== 0 ? 0: -1 ;
nextSourceIndex = 0;
-
-
+
+
targetCapacity = target.limit()-target.position();
-
+
//sourceIndex=-1 if the current character began in the previous buffer
sourceIndex = c== 0 ? 0: -1 ;
nextSourceIndex = 0;
-
+
int labelType = Loop; // set to Loop so that the code starts from loop:
- LabelLoop = true;
- AfterGetTrail = false;
- AfterGetTrailUnicode = false;
-
+ LabelLoop = true;
+ AfterGetTrail = false;
+ AfterGetTrailUnicode = false;
+
while(LabelLoop){
switch(labelType){
case Loop:
}
return cr;
}
-
+
private byte getWindow(int[] offsets){
int i;
for (i=0;i<8;i++){
}
return -1;
}
-
+
private boolean isInOffsetWindowOrDirect(int offsetValue, int a){
- return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f &
- ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) ||
- ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20
+ return (a & UConverterConstants.UNSIGNED_INT_MASK)<=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK)+0x7f &
+ ((a & UConverterConstants.UNSIGNED_INT_MASK)>=(offsetValue & UConverterConstants.UNSIGNED_INT_MASK) ||
+ ((a & UConverterConstants.UNSIGNED_INT_MASK)<=0x7f && ((a & UConverterConstants.UNSIGNED_INT_MASK)>=0x20
|| ((1L<<(a & UConverterConstants.UNSIGNED_INT_MASK))&0x2601)!=0)));
}
-
+
private byte getNextDynamicWindow(){
byte windowValue = data.windowUse[data.nextWindowUseIndex];
if(++data.nextWindowUseIndex==8){
}
return windowValue;
}
-
+
private void useDynamicWindow(byte windowValue){
/*first find the index of the window*/
int i,j;
i=7;
}
}while(data.windowUse[i]!=windowValue);
-
+
/*now copy each window[i+1] to [i]*/
j= i+1;
if(j==8){
j=0;
}
}
-
+
/*finally, set the window into the most recently used index*/
data.windowUse[i]= windowValue;
}
-
-
+
+
private int getDynamicOffset(){
int i;
for(i=0;i<7;++i){
if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x80){
/*No dynamic window for US-ASCII*/
return -1;
- }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) ||
+ }else if((c&UConverterConstants.UNSIGNED_INT_MASK)<0x3400 || ((c-0x10000)&UConverterConstants.UNSIGNED_INT_MASK)<(0x14000-0x10000) ||
((c-0x1d000)&UConverterConstants.UNSIGNED_INT_MASK)<=(0x1ffff-0x1d000)){
/*This character is in the code range for a "small", i.e, reasonably windowable, script*/
offset = c&0x7fffff80;
return -1;
}
}
-
+
private int loop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
int label = 0;
if(isSingleByteMode){
if(!AfterGetTrail){
c = source.get();
++nextSourceIndex;
-
+
}
if(((c -0x20)&UConverterConstants.UNSIGNED_INT_MASK)<=0x5f && !AfterGetTrail){
/*pass US-ASCII graphic character through*/
return label;
}
}
-
-
+
+
if(AfterGetTrail){
AfterGetTrail = false;
}
-
+
/*Compress supplementary character U+10000...U+10ffff */
if(((delta=(c-currentOffset))&UConverterConstants.UNSIGNED_INT_MASK)<=0x7f){
/*use the current dynamic window*/
length = 3;
label = OutputBytes;
return label;
- } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() ||
+ } else if(((int)((c-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && (source.position()>=source.limit() ||
((int)((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK))< (0xd800 - 0x3400))){
-
+
/*
* this character is not compressible (a BMP ideograph of similar)
* switch to Unicode mode if this is the last character in the block
label = GetTrailUnicode;
return label;
}
-
+
/*state machine for Unicode*/
/*unicodeByteMode*/
while(AfterGetTrailUnicode || source.hasRemaining()){
c = source.get();
++nextSourceIndex;
}
-
+
if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400) && !AfterGetTrailUnicode){
/*not compressible, write character directly */
if(targetCapacity>=2){
} else if((((c-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300*/&& !AfterGetTrailUnicode){
/*compress BMP character if the following one is not an uncompressible ideograph*/
if(!(source.hasRemaining() && (((source.get(source.position())-0x3400)& UConverterConstants.UNSIGNED_INT_MASK))<(0xd800-0x3400))){
- if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26
+ if(((((c-0x30)&UConverterConstants.UNSIGNED_INT_MASK))<10 || (((c-0x61)&UConverterConstants.UNSIGNED_INT_MASK))<26
|| (((c-0x41)&UConverterConstants.UNSIGNED_INT_MASK))<26)){
/*ASCII digit or letter*/
isSingleByteMode = true;
dynamicWindow = getNextDynamicWindow();
currentOffset = data.fromUDynamicOffsets[dynamicWindow]=offset;
useDynamicWindow(dynamicWindow);
- c = ((UD0+dynamicWindow)<<16) | (code<<8)
+ c = ((UD0+dynamicWindow)<<16) | (code<<8)
|(c - currentOffset) | 0x80;
length = 3;
label = OutputBytes;
return label;
}
}
-
+
/*don't know how to compress these character, just write it directly*/
length = 2;
label = OutputBytes;
label = OutputBytes;
return label;
}
-
+
if(AfterGetTrailUnicode){
AfterGetTrailUnicode = false;
}
label = EndLoop;
return label;
}
-
+
private int getTrail(CharBuffer source, ByteBuffer target, IntBuffer offsets){
lead = (char)c;
int label = Loop;
AfterGetTrail = true;
return label;
}
-
+
private int getTrailUnicode(CharBuffer source, ByteBuffer target, IntBuffer offsets){
int label = EndLoop;
AfterGetTrailUnicode = true;
/*c is surrogate*/
if(UTF16.isLeadSurrogate((char)c)){
- // getTrailUnicode:
+ // getTrailUnicode:
lead = (char)c;
if(source.hasRemaining()){
/*test the following code unit*/
label = EndLoop;
return label;
}
-
+
/*compress supplementary character*/
- if((window=getWindow(data.fromUDynamicOffsets))>=0 &&
- !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) <
+ if((window=getWindow(data.fromUDynamicOffsets))>=0 &&
+ !(source.hasRemaining() && ((source.get(source.position())-0x3400)&UConverterConstants.UNSIGNED_INT_MASK) <
(0xd800 - 0x3400))){
/*
* this is the dynamic window that contains this character and the following
label = OutputBytes;
return label;
}
-
+
}
-
+
private void endLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets){
/*set the converter state back to UConverter*/
data.fromUIsSingleByteMode = isSingleByteMode;
break;
}
targetCapacity-=length;
-
+
/*normal end of conversion: prepare for a new character*/
c = 0;
sourceIndex = nextSourceIndex;
/*
* We actually do this backwards here:
* In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
+ * first to the overflow buffer what does not fit into the
* regular target
*/
/* we know that 0<=targetCapacity<length<=4 */
break;
}
errorBufferLength = length;
-
+
/*now output what fits into the regular target*/
c>>=8*length; //length was reduced by targetCapacity
switch(targetCapacity){
default:
break;
}
-
+
/*target overflow*/
targetCapacity = 0;
cr = CoderResult.OVERFLOW;
return label;
}
}
-
+
}
-
+
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderSCSU(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderSCSU(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
CharsetICU.getCompleteUnicodeSet(setFillIn);
}
-
+
}
private int endianXOR;
private byte[] bom;
private byte[] fromUSubstitution;
-
+
private int version;
public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
} else {
version = 0;
}
-
+
this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE);
this.isBigEndian = !(this instanceof CharsetUTF16LE);
this.fromUSubstitution = fromUSubstitution_LE;
this.endianXOR = ENDIAN_XOR_LE;
}
-
+
/* UnicodeBig and UnicodeLittle requires maxBytesPerChar set to 4 in Java 5 or less */
if ((VersionInfo.javaVersion().getMajor() == 1 && VersionInfo.javaVersion().getMinor() <= 5)
- && (isEndianSpecified && version == 1)) {
- maxBytesPerChar = 4;
- } else {
- maxBytesPerChar = 2;
- }
+ && (isEndianSpecified && version == 1)) {
+ maxBytesPerChar = 4;
+ } else {
+ maxBytesPerChar = 2;
+ }
minBytesPerChar = 2;
maxCharsPerByte = 1;
}
class CharsetDecoderUTF16 extends CharsetDecoderICU {
-
+
private boolean isBOMReadYet;
private int actualEndianXOR;
private byte[] actualBOM;
super(cs);
}
+ @Override
protected void implReset() {
super.implReset();
isBOMReadYet = false;
actualBOM = null;
}
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
/*
* If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual
return CoderResult.UNDERFLOW;
toUBytesArray[toULength++] = source.get();
}
-
+
if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
return CoderResult.malformedForLength(2);
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
}
+ @Override
protected void implReset() {
super.implReset();
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
}
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult cr;
}
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderUTF16(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderUTF16(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
- getNonSurrogateUnicodeSet(setFillIn);
+ getNonSurrogateUnicodeSet(setFillIn);
}
}
super(cs);
}
+ @Override
protected void implReset() {
super.implReset();
isBOMReadYet = false;
actualBOM = null;
}
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
/*
* If we detect a BOM in this buffer, then we must add the BOM size to the offsets because the actual
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
}
+ @Override
protected void implReset() {
super.implReset();
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
}
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult cr;
}
}
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderUTF32(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderUTF32(this);
}
-
-
+
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
- getNonSurrogateUnicodeSet(setFillIn);
+ getNonSurrogateUnicodeSet(setFillIn);
}
}
private final static String IMAP_NAME="IMAP-mailbox-name";
private boolean useIMAP;
protected byte[] fromUSubstitution=new byte[]{0x3F};
-
+
public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */
minBytesPerChar=1;
maxCharsPerByte=1;
-
+
useIMAP=false;
-
+
if (icuCanonicalName.equals(IMAP_NAME)) {
useIMAP=true;
}
}
-
+
//private static boolean inSetD(char c) {
// return (
// (char)(c - 97) < 26 || (char)(c - 65) < 26 || /* letters */
// (c==58) || (c==63) /* :? */
// );
//}
-
+
//private static boolean inSetO(char c) {
// return (
// (char)(c - 33) < 6 || /* !"#$%& */
// (c==58) || (c==63) /* *@[ */
// );
//}
-
+
private static boolean isCRLFTAB(char c) {
return (
(c==13) || (c==10) || (c==9)
);
}
-
+
//private static boolean isCRLFSPTAB(char c) {
// return (
// (c==32) || (c==13) || (c==10) || (c==9)
// );
//}
-
+
private static final byte PLUS=43;
private static final byte MINUS=45;
private static final byte BACKSLASH=92;
private static final byte AMPERSAND=0x26;
private static final byte COMMA=0x2c;
private static final byte SLASH=0x2f;
-
+
// legal byte values: all US-ASCII graphic characters 0x20..0x7e
private static boolean isLegal(char c, boolean useIMAP) {
if (useIMAP) {
);
}
}
-
+
// directly encode all of printable ASCII 0x20..0x7e except '&' 0x26
private static boolean inSetDIMAP(char c) {
return (
(isLegal(c, true) && c != AMPERSAND)
);
}
-
+
private static byte TO_BASE64_IMAP(int n) {
return (n < 63 ? TO_BASE_64[n] : COMMA);
}
-
+
private static byte FROM_BASE64_IMAP(char c) {
return (c==COMMA ? 63 : c==SLASH ? -1 : FROM_BASE_64[c]);
}
-
+
/* encode directly sets D and O and CR LF SP TAB */
private static final byte ENCODE_DIRECTLY_MAXIMUM[] =
{
/*0 1 2 3 4 5 6 7 8 9 a b c d e f*/
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
-
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0
};
-
+
/* encode directly set D and CR LF SP TAB but not set O */
private static final byte ENCODE_DIRECTLY_RESTRICTED[] =
{
/*0 1 2 3 4 5 6 7 8 9 a b c d e f*/
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
+
1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
-
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
};
-
+
private static final byte TO_BASE_64[] =
{
/* A-Z */
/* +/ */
43, 47
};
-
+
private static final byte FROM_BASE_64[] =
{
/* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
/* A-Z */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1,
/* a-z*/
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3
};
-
+
class CharsetDecoderUTF7 extends CharsetDecoderICU {
public CharsetDecoderUTF7(CharsetICU cs) {
super(cs);
implReset();
}
-
+
+ @Override
protected void implReset() {
super.implReset();
toUnicodeStatus=(toUnicodeStatus & 0xf0000000) | 0x1000000;
}
-
- protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+
+ @Override
+ protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
CoderResult cr=CoderResult.UNDERFLOW;
byte base64Value;
byte base64Counter;
char bits;
int byteIndex;
int sourceIndex, nextSourceIndex;
-
+
int length;
-
+
char b;
char c;
-
+
int sourceArrayIndex=source.position();
-
+
//get the state of the machine state
{
int status=toUnicodeStatus;
byteIndex=toULength;
/* sourceIndex=-1 if the current character began in the previous buffer */
sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
+ nextSourceIndex=0;
+
directMode: while (true) {
if (inDirectMode==1) {
- /*
+ /*
* In Direct Mode, most US-ASCII characters are encoded directly, i.e.,
* with their US-ASCII byte values.
* Backslash and Tilde and most control characters are not alled in UTF-7.
* A plus sign starts Unicode (or "escape") Mode.
* An ampersand starts Unicode Mode for IMAP.
- *
+ *
* In Direct Mode, only the sourceIndex is used.
*/
byteIndex=0;
length=source.remaining();
//targetCapacity=target.remaining();
- //Commented out because length of source may be larger than target when it comes to bytes
+ //Commented out because length of source may be larger than target when it comes to bytes
/*if (useIMAP && length > targetCapacity) {
length=targetCapacity;
}*/
}
break directMode;
} else { /* Unicode Mode*/
- /*
+ /*
* In Unicode Mode, UTF-16BE is base64-encoded.
* The base64 sequence ends with any character that is not in the base64 alphabet.
* A terminating minus sign is consumed.
- *
+ *
* In Unicode Mode, the sourceIndex has the index to the start of the current
* base64 bytes, while nextSourceIndex is precisely parallel to source,
* keeping the index to the following byte.
* 2.2.2. Else if the current char is illegal, we might as well deal with it here.
*/
inDirectMode=1;
-
+
if(base64Counter==-1) {
/* illegal: + immediately followed by something other than base64 or minus sign */
/* include the plus sign in the reported sequence, but not the subsequent char */
bits=0;
base64Counter=0;
break;
- //default:
+ //default:
/* will never occur */
- //break;
+ //break;
}//end of switch
} else if (!useIMAP || (useIMAP && base64Value==-2)) {
/* minus sign terminates the base64 sequence */
}
sourceIndex=nextSourceIndex;
continue directMode;
- } else if (useIMAP) {
+ } else if (useIMAP) {
if (base64Counter==-1) {
// illegal: & immediately followed by something other than base64 or minus sign
// include the ampersand in the reported sequence
inDirectMode=1;
cr=CoderResult.malformedForLength(sourceIndex);
}
-
+
} else {
if (!cr.isError() && flush && !source.hasRemaining() && bits ==0) {
/*
* if we are in Unicode Mode, then the byteIndex might not be 0,
* but that is ok if bits -- 0
- * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
+ * -> we set byteIndex=0 at the end of the stream to avoid a truncated error
* (not true for IMAP-mailbox-name where we must end in direct mode)
*/
if (!cr.isOverflow()) {
}
}
/* set the converter state */
- toUnicodeStatus=(inDirectMode<<24 | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (int)bits);
+ toUnicodeStatus=(inDirectMode<<24 | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | bits);
toULength=byteIndex;
-
+
return cr;
}
}
-
+
class CharsetEncoderUTF7 extends CharsetEncoderICU {
public CharsetEncoderUTF7(CharsetICU cs) {
super(cs, fromUSubstitution);
implReset();
}
-
+
+ @Override
protected void implReset() {
super.implReset();
fromUnicodeStatus=(fromUnicodeStatus & 0xf0000000) | 0x1000000;
}
-
+
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
CoderResult cr=CoderResult.UNDERFLOW;
byte inDirectMode;
byte encodeDirectly[];
int status;
-
+
int length, targetCapacity, sourceIndex;
-
+
byte base64Counter;
char bits;
char c;
}
/* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
sourceIndex=0;
-
+
directMode: while(true) {
if(inDirectMode==1) {
length=source.remaining();
cr=CoderResult.OVERFLOW;
}
break directMode;
- } else {
+ } else {
/* Unicode Mode */
while (source.hasRemaining()) {
if (target.hasRemaining()) {
if ((!useIMAP && c<=127 && encodeDirectly[c]==1) || (useIMAP && isLegal(c, useIMAP))) {
/* encode directly */
inDirectMode=1;
-
+
/* trick: back out this character to make this easier */
source.position(source.position() - 1);
-
+
/* terminate the base64 sequence */
if (base64Counter!=0) {
/* write remaining bits for the previous character */
* base64 this character:
* Output 2 or 3 base64 bytres for the remaining bits of the previous character
* and the bits of this character, each implicitly in UTF-16BE.
- *
+ *
* Here, bits is an 8-bit variable because only 6 bits need to be kept from one
* character to the next. The actual 2 or 4 bits are shifted to the left edge
* of the 6-bits filed 5..0 to make the termination of the base64 sequence easier.
//default:
/* will never occur */
//break;
- } //end of switch
- }
+ } //end of switch
+ }
} else {
/* target is full */
cr=CoderResult.OVERFLOW;
break directMode;
}
} //end of directMode label
-
+
if (flush && !source.hasRemaining()) {
/* flush remaining bits to the target */
if (inDirectMode==0) {
cr=CoderResult.OVERFLOW;
}
}
-
+
/* need to terminate with a minus */
if (target.hasRemaining()) {
target.put(MINUS);
fromUnicodeStatus=((status&0xf0000000) | 0x1000000); /* keep version, inDirectMode=TRUE */
} else {
/* set the converter state back */
- fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | (((short)base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | ((int)bits));
+ fromUnicodeStatus=((status&0xf0000000) | (inDirectMode<<24) | ((base64Counter & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) | (bits));
}
-
+
return cr;
}
}
-
+
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderUTF7(this);
}
-
+
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderUTF7(this);
}
-
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
getCompleteUnicodeSet(setFillIn);
}
super(cs);
}
+ @Override
protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets,
boolean flush) {
if (!source.hasRemaining()) {
implReset();
}
+ @Override
protected void implReset() {
super.implReset();
}
+ @Override
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets,
boolean flush) {
if (!source.hasRemaining()) {
CoderResult cr = handleSurrogates(sourceArray, sourceIndex, sourceLimit, (char)char32);
if (cr != null)
return cr;
-
+
sourceIndex++;
char32 = fromUChar32;
fromUChar32 = 0;
CoderResult cr = handleSurrogates(source, (char)char32);
if (cr != null)
return cr;
-
+
char32 = fromUChar32;
fromUChar32 = 0;
-
+
/* the rest is routine -- encode four bytes, stopping on overflow */
target.put(encodeHeadOf4(char32));
// UConverterConstants.UNSIGNED_BYTE_MASK)<0x3e);}
/*
* Is this code unit (byte) a UTF-8 trail byte?
- *
+ *
* @param c
* 8-bit code unit (byte)
* @return TRUE or FALSE
return (((c) & 0xc0) == 0x80);
}*/
+ @Override
public CharsetDecoder newDecoder() {
return new CharsetDecoderUTF8(this);
}
+ @Override
public CharsetEncoder newEncoder() {
return new CharsetEncoderUTF8(this);
}
-
-
+
+
+ @Override
void getUnicodeSetImpl( UnicodeSet setFillIn, int which){
getNonSurrogateUnicodeSet(setFillIn);
}
return ICUBinary.getInts(byteBuffer, n, 0);
}
+ @Override
public boolean isDataVersionAcceptable(byte version[])
{
return version.length >= DATA_FORMAT_VERSION.length
&& version[1] == DATA_FORMAT_VERSION[1]
&& version[2] == DATA_FORMAT_VERSION[2];
}
-
+
/*byte[] getUnicodeVersion(){
- return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion);
+ return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion);
}*/
// private data members -------------------------------------------------
* -- normal base table with optional extension
*
* int32_t stateTable[countStates][256];
- *
+ *
* struct _MBCSToUFallback { (fallbacks are sorted by offset)
* uint32_t offset;
* UChar32 codePoint;
* } toUFallbacks[countToUFallbacks];
- *
+ *
* uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2];
* (padded to an even number of units)
- *
+ *
* -- stage 1 tables
* if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) {
* -- stage 1 table for all of Unicode
* -- BMP-only tables have a smaller stage 1 table
* uint16_t fromUTable[0x40]; (32-bit-aligned)
* }
- *
+ *
* -- stage 2 tables
* length determined by top of stage 1 and bottom of stage 3 tables
* if(outputType==MBCS_OUTPUT_1) {
* -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes
* uint32_t stage 2 flags and indexes[?];
* }
- *
+ *
* -- stage 3 tables with byte results
* if(outputType==MBCS_OUTPUT_1) {
* -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c
* the result bytes in fromUBytes[]; (0 indexes fromUBytes[0])
* }
* }
- *
+ *
* The first pair in a section contains the number of following pairs in the
* UChar position (16 bits, number=1..0xffff).
* The value of the initial pair is used when the current UChar is not found
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte formatVersion[]) {
return formatVersion[0] == 6;
}
private static final boolean DEBUG = false;
private static final class BundleImporter implements CollationRuleParser.Importer {
BundleImporter() {}
+ @Override
public String getRules(String localeID, String collationType) {
return CollationLoader.loadRules(new ULocale(localeID), collationType);
}
}
/** Implements CollationRuleParser.Sink. */
- // Java 6: @Override
+ @Override
void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) {
String nfdPrefix;
if(prefix.length() == 0) {
CEFinalizer(long[] ces) {
finalCEs = ces;
}
+ @Override
public long modifyCE32(int ce32) {
assert(!Collation.isSpecialCE32(ce32));
if(CollationBuilder.isTempCE32(ce32)) {
return Collation.NO_CE;
}
}
+ @Override
public long modifyCE(long ce) {
if(CollationBuilder.isTempCE(ce)) {
// retain case bits
}
private static final class IsAcceptable implements ICUBinary.Authenticate {
- // @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 5;
}
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
-/*
+/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
long start, end;
int length, count;
- // Java 6: @Override
+ @Override
public int compareTo(WeightRange other) {
long l=start;
long r=other.start;
* <h2>Direct Use</h2>
* <p>The following shows an example of building an index directly.
* The "show..." methods below are just to illustrate usage.
- *
+ *
* <pre>
* // Create a simple index where the values for the strings are Integers, and add the strings
- *
+ *
* AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale);
* int counter = 0;
* for (String item : test) {
- * index.addRecord(item, counter++);
+ * index.addRecord(item, counter++);
* }
* ...
* // Show index at top. We could skip or gray out empty buckets
- *
+ *
* for (AlphabeticIndex.Bucket<Integer> bucket : index) {
* if (showAll || bucket.size() != 0) {
* showLabelAtTop(UI, bucket.getLabel());
* }
* ...
* // Show the buckets with their contents, skipping empty buckets
- *
+ *
* for (AlphabeticIndex.Bucket<Integer> bucket : index) {
* if (bucket.size() != 0) {
* showLabelInList(UI, bucket.getLabel());
// Comparator for records, so that the Record class can be static.
private final Comparator<Record<V>> recordComparator = new Comparator<Record<V>>() {
+ @Override
public int compare(Record<V> o1, Record<V> o2) {
return collatorOriginal.compare(o1.name, o2.name);
}
* {@inheritDoc}
* @stable ICU 51
*/
+ @Override
public Iterator<Bucket<V>> iterator() {
return buckets.iterator();
}
/**
* Create the index object.
- *
+ *
* @param locale
* The locale for the index.
* @stable ICU 4.8
/**
* Create the index object.
- *
+ *
* @param locale
* The locale for the index.
* @stable ICU 4.8
this(ULocale.forLocale(locale), null);
}
- /**
+ /**
* Create an AlphabeticIndex that uses a specific collator.
- *
+ *
* <p>The index will be created with no labels; the addLabels() function must be called
* after creation to add the desired labels to the index.
- *
- * <p>The index will work directly with the supplied collator. If the caller will need to
+ *
+ * <p>The index will work directly with the supplied collator. If the caller will need to
* continue working with the collator it should be cloned first, so that the
* collator provided to the AlphabeticIndex remains unchanged after creation of the index.
- *
+ *
* @param collator The collator to use to order the contents of this index.
* @stable ICU 51
*/
/**
* Get the default label used in the IndexCharacters' locale for underflow, eg the last item in: X Y Z ...
- *
+ *
* @return underflow label
* @stable ICU 4.8
*/
/**
* Get the default label used in the IndexCharacters' locale for overflow, eg the first item in: ... A B C
- *
+ *
* @return overflow label
* @stable ICU 4.8
*/
/**
* Get the default label used for abbreviated buckets <i>between</i> other labels. For example, consider the labels
* for Latin and Greek are used: X Y Z ... Α Β Γ.
- *
+ *
* @return inflow label
* @stable ICU 4.8
*/
/**
* Get the limit on the number of labels in the index. The number of buckets can be slightly larger: see getBucketCount().
- *
+ *
* @return maxLabelCount maximum number of labels.
* @stable ICU 4.8
*/
/**
* Get the labels.
- *
+ *
* @return The list of bucket labels, after processing.
* @stable ICU 4.8
*/
* <p>
* <b><i>Don't use this method across threads if you are changing the settings on the collator, at least not without
* synchronizing.</i></b>
- *
+ *
* @return a clone of the collator used internally
* @stable ICU 4.8
*/
* Add a record (name and data) to the index. The name will be used to sort the items into buckets, and to sort
* within the bucket. Two records may have the same name. When they do, the sort order is according to the order added:
* the first added comes first.
- *
+ *
* @param name
* Name, such as a name
* @param data
* <p>
* Note that the bucket number (and sort key) are only valid for the settings of the current AlphabeticIndex; if
* those are changed, then the bucket number and sort key must be regenerated.
- *
+ *
* @param name
* Name, such as a name
* @return the bucket index for the name
/**
* Clear the index.
- *
+ *
* @return this, for chaining
* @stable ICU 4.8
*/
/**
* Return the number of buckets in the index. This will be the same as the number of labels, plus buckets for the underflow, overflow, and inflow(s).
- *
+ *
* @return number of buckets
* @stable ICU 4.8
*/
/**
* Return the number of records in the index: that is, the total number of distinct <name,data> pairs added with addRecord(...), over all the buckets.
- *
+ *
* @return total number of records in buckets
* @stable ICU 4.8
*/
/**
* Return an iterator over the buckets.
- *
+ *
* @return iterator over buckets.
* @stable ICU 4.8
*/
+ @Override
public Iterator<Bucket<V>> iterator() {
initBuckets();
return buckets.iterator();
/**
* A (name, data) pair, to be sorted by name into one of the index buckets.
* The user data is not used by the index implementation.
- *
+ *
* @stable ICU 4.8
*/
public static class Record<V> {
/**
* Get the name
- *
+ *
* @return the name
* @stable ICU 4.8
*/
/**
* Get the data
- *
+ *
* @return the data
* @stable ICU 4.8
*/
* Standard toString()
* @stable ICU 4.8
*/
+ @Override
public String toString() {
return name + "=" + data;
}
* returned by {@link AlphabeticIndex.ImmutableIndex#getBucket(int)},
* and {@link AlphabeticIndex#addRecord(CharSequence, Object)} adds a record
* into a bucket according to the record's name.
- *
+ *
* @param <V>
* Data type
* @stable ICU 4.8
/**
* Type of the label
- *
+ *
* @stable ICU 4.8
*/
public enum LabelType {
/**
* Set up the bucket.
- *
+ *
* @param label
* label for the bucket
* @param labelType
/**
* Get the label
- *
+ *
* @return label for the bucket
* @stable ICU 4.8
*/
/**
* Is a normal, underflow, overflow, or inflow bucket
- *
+ *
* @return is an underflow, overflow, or inflow bucket
* @stable ICU 4.8
*/
/**
* Get the number of records in the bucket.
- *
+ *
* @return number of records in bucket
* @stable ICU 4.8
*/
* Iterator over the records in the bucket
* @stable ICU 4.8
*/
+ @Override
public Iterator<Record<V>> iterator() {
if (records == null) {
return Collections.<Record<V>>emptyList().iterator();
/**
* Iterator over just the visible buckets.
*/
+ @Override
public Iterator<Bucket<V>> iterator() {
return immutableVisibleList.iterator(); // use immutable list to prevent remove().
}
final class CollatorServiceShim extends Collator.ServiceShim {
+ @Override
Collator getInstance(ULocale locale) {
// use service cache, it's faster than instantiation
// if (service.isDefault()) {
}
}
+ @Override
Object registerInstance(Collator collator, ULocale locale) {
// Set the collator locales while registering so that getInstance()
// need not guess whether the collator's locales are already set properly
return service.registerObject(collator, locale);
}
+ @Override
Object registerFactory(CollatorFactory f) {
class CFactory extends LocaleKeyFactory {
CollatorFactory delegate;
this.delegate = fctry;
}
+ @Override
public Object handleCreate(ULocale loc, int kind, ICUService srvc) {
Object coll = delegate.createCollator(loc);
return coll;
}
+ @Override
public String getDisplayName(String id, ULocale displayLocale) {
ULocale objectLocale = new ULocale(id);
return delegate.getDisplayName(objectLocale, displayLocale);
}
+ @Override
public Set<String> getSupportedIDs() {
return delegate.getSupportedLocaleIDs();
}
return service.registerFactory(new CFactory(f));
}
+ @Override
boolean unregister(Object registryKey) {
return service.unregisterFactory((Factory)registryKey);
}
+ @Override
Locale[] getAvailableLocales() {
// TODO rewrite this to just wrap getAvailableULocales later
Locale[] result;
return result;
}
+ @Override
ULocale[] getAvailableULocales() {
ULocale[] result;
if (service.isDefault()) {
return result;
}
+ @Override
String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
String id = objectLocale.getName();
return service.getDisplayName(id, displayLocale);
///CLOVER:OFF
// The following method can not be reached by testing
+ @Override
protected Object handleDefault(Key key, String[] actualIDReturn) {
if (actualIDReturn != null) {
actualIDReturn[0] = "root";
// - Other utilities?
public class GlobalizationPreferences implements Freezable<GlobalizationPreferences> {
-
+
/**
* Default constructor
* @draft ICU 3.6
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
- public static final int
+ public static final int
NF_NUMBER = 0, // NumberFormat.NUMBERSTYLE
NF_CURRENCY = 1, // NumberFormat.CURRENCYSTYLE
NF_PERCENT = 2, // NumberFormat.PERCENTSTYLE
ID_LANGUAGE = 1,
ID_SCRIPT = 2,
ID_TERRITORY = 3,
- ID_VARIANT = 4,
+ ID_VARIANT = 4,
ID_KEYWORD = 5,
ID_KEYWORD_VALUE = 6,
ID_CURRENCY = 7,
* for the appropriate territory, currency, timezone, etc. The
* user should be given the opportunity to correct those defaults
* in case they are incorrect.
- *
- * @param inputLocales list of locales in priority order, eg {"be", "fr"}
+ *
+ * @param inputLocales list of locales in priority order, eg {"be", "fr"}
* for Breton first, then French if that fails.
* @return this, for chaining
* @draft ICU 3.6
/**
* Get a copy of the language/locale priority list
- *
+ *
* @return a copy of the language/locale priority list.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
if (locales == null) {
result = guessLocales();
} else {
- result = new ArrayList<ULocale>();
+ result = new ArrayList<ULocale>();
result.addAll(locales);
}
return result;
/**
* Convenience routine for setting the language/locale priority
* list from an array.
- *
+ *
* @see #setLocales(List locales)
* @param uLocales list of locales in an array
* @return this, for chaining
/**
* Convenience routine for setting the language/locale priority
* list from a single locale/language.
- *
+ *
* @see #setLocales(List locales)
* @param uLocale single locale
* @return this, for chaining
* Convenience routine for setting the locale priority list from
* an Accept-Language string.
* @see #setLocales(List locales)
- * @param acceptLanguageString Accept-Language list, as defined by
+ * @param acceptLanguageString Accept-Language list, as defined by
* Section 14.4 of the RFC 2616 (HTTP 1.1)
* @return this, for chaining
* @draft ICU 3.6
* Convenience function to get a ResourceBundle instance using
* the specified base name based on the language/locale priority list
* stored in this object.
- *
+ *
* @param baseName the base name of the resource bundle, a fully qualified
* class name
* @return a resource bundle for the given base name and locale based on the
* Convenience function to get a ResourceBundle instance using
* the specified base name and class loader based on the language/locale
* priority list stored in this object.
- *
+ *
* @param baseName the base name of the resource bundle, a fully qualified
* class name
* @param loader the class object from which to load the resource bundle
}
return urb;
}
-
+
/**
* Sets the territory, which is a valid territory according to for
* RFC 3066 (or successor). If not otherwise set, default
* currency and timezone values will be set from this. The user
* should be given the opportunity to correct those defaults in
* case they are incorrect.
- *
+ *
* @param territory code
* @return this, for chaining
* @draft ICU 3.6
/**
* Gets the territory setting. If it wasn't explicitly set, it is
* computed from the general locale setting.
- *
+ *
* @return territory code, explicit or implicit.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
/**
* Sets the currency code. If this has not been set, uses default for territory.
- *
+ *
* @param currency Valid ISO 4217 currency code.
* @return this, for chaining
* @draft ICU 3.6
/**
* Get a copy of the currency computed according to the settings.
- *
+ *
* @return currency code, explicit or implicit.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
/**
* Sets the calendar. If this has not been set, uses default for territory.
- *
+ *
* @param calendar arbitrary calendar
* @return this, for chaining
* @draft ICU 3.6
}
/**
- * Get a copy of the calendar according to the settings.
- *
+ * Get a copy of the calendar according to the settings.
+ *
* @return calendar explicit or implicit.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
/**
* Sets the timezone ID. If this has not been set, uses default for territory.
- *
+ *
* @param timezone a valid TZID (see UTS#35).
* @return this, for chaining
* @draft ICU 3.6
/**
* Get the timezone. It was either explicitly set, or is
* heuristically computed from other settings.
- *
+ *
* @return timezone, either implicitly or explicitly set
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
}
/**
- * Get a copy of the collator according to the settings.
- *
+ * Get a copy of the collator according to the settings.
+ *
* @return collator explicit or implicit.
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
throw new UnsupportedOperationException("Attempt to modify immutable object");
}
try {
- this.collator = (Collator) collator.clone(); // clone for safety
+ this.collator = (Collator) collator.clone(); // clone for safety
} catch (CloneNotSupportedException e) {
throw new ICUCloneNotSupportedException("Error in cloning collator", e);
}
/**
* Get a copy of the break iterator for the specified type according to the
* settings.
- *
+ *
* @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE
* @return break iterator explicit or implicit
* @draft ICU 3.6
/**
* Explicitly set the break iterator for this object.
- *
+ *
* @param type break type - BI_CHARACTER or BI_WORD, BI_LINE, BI_SENTENCE, BI_TITLE
* @param iterator a break iterator
* @return this, for chaining
/**
* Get the display name for an ID: language, script, territory, currency, timezone...
* Uses the language priority list to do so.
- *
+ *
* @param id language code, script code, ...
* @param type specifies the type of the ID: ID_LANGUAGE, etc.
* @return the display name
}
switch (type) {
case ID_LOCALE:
- result = ULocale.getDisplayName(id, locale);
+ result = ULocale.getDisplayName(id, locale);
break;
case ID_LANGUAGE:
- result = ULocale.getDisplayLanguage(id, locale);
+ result = ULocale.getDisplayLanguage(id, locale);
break;
case ID_SCRIPT:
- result = ULocale.getDisplayScript("und-" + id, locale);
+ result = ULocale.getDisplayScript("und-" + id, locale);
break;
case ID_TERRITORY:
- result = ULocale.getDisplayCountry("und-" + id, locale);
+ result = ULocale.getDisplayCountry("und-" + id, locale);
break;
case ID_VARIANT:
// TODO fix variant parsing
- result = ULocale.getDisplayVariant("und-QQ-" + id, locale);
+ result = ULocale.getDisplayVariant("und-QQ-" + id, locale);
break;
case ID_KEYWORD:
- result = ULocale.getDisplayKeyword(id, locale);
+ result = ULocale.getDisplayKeyword(id, locale);
break;
case ID_KEYWORD_VALUE:
String[] parts = new String[2];
case ID_CURRENCY_SYMBOL:
case ID_CURRENCY:
Currency temp = new Currency(id);
- result =temp.getName(locale, type==ID_CURRENCY
- ? Currency.LONG_NAME
+ result =temp.getName(locale, type==ID_CURRENCY
+ ? Currency.LONG_NAME
: Currency.SYMBOL_NAME, new boolean[1]);
// TODO: have method that doesn't take parameter. Add
// function to determine whether string is choice
- // format.
+ // format.
// TODO: have method that doesn't require us
// to create a currency
break;
// TODO, have method that doesn't require us to create a timezone
// fix other hacks
// hack for couldn't match
-
+
boolean isBadStr = false;
// Matcher badTimeZone = Pattern.compile("[A-Z]{2}|.*\\s\\([A-Z]{2}\\)").matcher("");
// badtzstr = badTimeZone.reset(result).matches();
* a particular combination of dateStyle and timeStyle. DF_NONE should
* be used if for the style, where only the date or time format individually
* is being set.
- *
+ *
* @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
* @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
* @param format The date format
* is returned. Otherwise, the language priority list is used.
* DF_NONE should be used for the style, where only the date or
* time format individually is being gotten.
- *
+ *
* @param dateStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
* @param timeStyle DF_FULL, DF_LONG, DF_MEDIUM, DF_SHORT or DF_NONE
* @return a DateFormat, according to the above description
* there is an explicit (non-null) number format set, a copy of
* that is returned. Otherwise, the language priority list is
* used.
- *
+ *
* @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
/**
* Sets a number format explicitly. Overrides the general locale settings.
- *
+ *
* @param style NF_NUMBER, NF_CURRENCY, NF_PERCENT, NF_SCIENTIFIC, NF_INTEGER
* @param format The number format
* @return this, for chaining
/**
* Restore the object to the initial state.
- *
+ *
* @return this, for chaining
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
* Before: en_US, en, en_GB, en, en, fr_FR, fr
* <br>
* After: en_US, en_GB, en, fr_FR, fr
- * <br>
+ * <br>
* <br>
* The final locale list is used to produce a default value for the appropriate territory,
* currency, timezone, etc. The list also represents the lookup order used in
* <code>getResourceBundle</code> for this object. A subclass may override this method
* to customize the algorithm used for populating the locale list.
- *
+ *
* @param inputLocales The list of input locales
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
// We want to see zh_Hant_HK before zh_HK
result.add(j, uloc);
bInserted = true;
- break;
+ break;
} else if (script.length() == 0 && country.length() > 0 && c.length() == 0) {
// We want to see zh_HK before zh_Hant
result.add(j, uloc);
/*
* Step 3: Remove earlier occurrence of duplicated locales
- *
+ *
* Example:
* Before - en_US_Boston, en_US, en, en_US, en, fr_FR, fr,
* zh_TW, zn, zh_Hant, zh, zh, fr_CA, fr
return result;
}
-
+
/**
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @param dateStyle
* @param timeStyle
* @draft ICU 3.6
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @param style
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
/**
* This function can be overridden by subclasses to use different heuristics.
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
/**
* This function can be overridden by subclasses to use different heuristics
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
* This function can be overridden by subclasses to use different heuristics
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @param type
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
// for single-zone countries, pick that zone
// for others, pick the most populous zone
// for now, just use fixed value
- // NOTE: in a few cases can do better by looking at language.
+ // NOTE: in a few cases can do better by looking at language.
// Eg haw+US should go to Pacific/Honolulu
// fr+CA should go to America/Montreal
String timezoneString = territory_tzid_hack_map.get(getTerritory());
* This function can be overridden by subclasses to use different heuristics.
* <b>It MUST return a 'safe' value,
* one whose modification will not affect this object.</b>
- *
+ *
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
}
return Calendar.getInstance(getTimeZone(), calLocale);
}
-
+
// PRIVATES
-
+
private List<ULocale> locales;
private String territory;
private Currency currency;
private DateFormat[][] dateFormats;
private NumberFormat[] numberFormats;
private List<ULocale> implicitLocales;
-
+
{
reset();
}
if (bits != null && bits.get(type)) {
return true;
}
- return false;
+ return false;
}
-
+
/*
* Available locales for service types
*/
}
/** WARNING: All of this data is temporary, until we start importing from CLDR!!!
- *
+ *
*/
private static final Map<String, String> language_territory_hack_map = new HashMap<String, String>();
private static final String[][] language_territory_hack = {
{"MH", "Pacific/Majuro"},
{"MN", "Asia/Ulaanbaatar"},
{"SJ", "Arctic/Longyearbyen"},
- {"UM", "Pacific/Midway"},
+ {"UM", "Pacific/Midway"},
};
static {
for (int i = 0; i < territory_tzid_hack.length; ++i) {
}
// Freezable implementation
-
+
private volatile boolean frozen;
/**
* @draft ICU 3.6
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public boolean isFrozen() {
return frozen;
}
* @draft ICU 4.4
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public GlobalizationPreferences freeze() {
frozen = true;
return this;
* @draft ICU 4.4
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public GlobalizationPreferences cloneAsThawed() {
try {
GlobalizationPreferences result = (GlobalizationPreferences) clone();
* @internal
*/
public class CalendarAstronomer {
-
+
//-------------------------------------------------------------------------
// Astronomical constants
//-------------------------------------------------------------------------
* @internal
*/
public static final double SIDEREAL_DAY = 23.93446960027;
-
+
/**
* The number of sidereal hours in one mean solar day.
* Approximately 24.07.
* @internal
*/
public static final double SOLAR_DAY = 24.065709816;
-
+
/**
* The average number of solar days from one new moon to the next. This is the time
* it takes for the moon to return the same ecliptic longitude as the sun.
* @internal
*/
public static final double SYNODIC_MONTH = 29.530588853;
-
+
/**
* The average number of days it takes
* for the moon to return to the same ecliptic longitude relative to the
* @internal
*/
public static final double SIDEREAL_MONTH = 27.32166;
-
+
/**
* The average number number of days between successive vernal equinoxes.
* Due to the precession of the earth's
* @internal
*/
public static final double TROPICAL_YEAR = 365.242191;
-
+
/**
* The average number of days it takes
* for the sun to return to the same position against the fixed stellar
// Time-related constants
//-------------------------------------------------------------------------
- /**
- * The number of milliseconds in one second.
+ /**
+ * The number of milliseconds in one second.
* @internal
*/
public static final int SECOND_MS = 1000;
- /**
- * The number of milliseconds in one minute.
+ /**
+ * The number of milliseconds in one minute.
* @internal
*/
public static final int MINUTE_MS = 60*SECOND_MS;
- /**
- * The number of milliseconds in one hour.
+ /**
+ * The number of milliseconds in one hour.
* @internal
*/
public static final int HOUR_MS = 60*MINUTE_MS;
- /**
- * The number of milliseconds in one day.
+ /**
+ * The number of milliseconds in one day.
* @internal
*/
public static final long DAY_MS = 24*HOUR_MS;
* @internal
*/
public static final long JULIAN_EPOCH_MS = -210866760000000L;
-
+
// static {
// Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
// cal.clear();
// cal.add(cal.DATE, -1);
// System.out.println("0.0 Jan 2000 = " + cal.getTime().getTime());
// }
-
+
/**
* Milliseconds value for 0.0 January 2000 AD.
*/
static private final double RAD_HOUR = 12 / PI; // radians -> hours
static private final double DEG_RAD = PI / 180; // degrees -> radians
static private final double RAD_DEG = 180 / PI; // radians -> degrees
-
+
//-------------------------------------------------------------------------
// Constructors
//-------------------------------------------------------------------------
public CalendarAstronomer() {
this(System.currentTimeMillis());
}
-
+
/**
* Construct a new <code>CalendarAstronomer</code> object that is initialized to
* the specified date and time.
public CalendarAstronomer(Date d) {
this(d.getTime());
}
-
+
/**
* Construct a new <code>CalendarAstronomer</code> object that is initialized to
* the specified time. The time is expressed as a number of milliseconds since
public CalendarAstronomer(long aTime) {
time = aTime;
}
-
+
/**
* Construct a new <code>CalendarAstronomer</code> object with the given
* latitude and longitude. The object's time is set to the current
fLatitude = normPI(latitude * DEG_RAD);
fGmtOffset = (long)(fLongitude * 24 * HOUR_MS / PI2);
}
-
-
+
+
//-------------------------------------------------------------------------
// Time and date getters and setters
//-------------------------------------------------------------------------
-
+
/**
* Set the current date and time of this <code>CalendarAstronomer</code> object. All
* astronomical calculations are performed based on this time setting.
time = aTime;
clearCache();
}
-
+
/**
* Set the current date and time of this <code>CalendarAstronomer</code> object. All
* astronomical calculations are performed based on this time setting.
public void setDate(Date date) {
setTime(date.getTime());
}
-
+
/**
* Set the current date and time of this <code>CalendarAstronomer</code> object. All
* astronomical calculations are performed based on this time setting.
*
* @param jdn the desired time, expressed as a "julian day number",
- * which is the number of elapsed days since
+ * which is the number of elapsed days since
* 1/1/4713 BC (Julian), 12:00 GMT. Note that julian day
* numbers start at <em>noon</em>. To get the jdn for
* the corresponding midnight, subtract 0.5.
clearCache();
julianDay = jdn;
}
-
+
/**
* Get the current time of this <code>CalendarAstronomer</code> object,
* represented as the number of milliseconds since
public long getTime() {
return time;
}
-
+
/**
* Get the current time of this <code>CalendarAstronomer</code> object,
* represented as a <code>Date</code> object.
public Date getDate() {
return new Date(time);
}
-
+
/**
* Get the current time of this <code>CalendarAstronomer</code> object,
* expressed as a "julian day number", which is the number of elapsed
}
return julianDay;
}
-
+
/**
* Return this object's time expressed in julian centuries:
* the number of centuries after 1/1/1900 AD, 12:00 GMT
if (siderealTime == INVALID) {
// See page 86 of "Practial Astronomy with your Calculator",
// by Peter Duffet-Smith, for details on the algorithm.
-
+
double UT = normalize((double)time/HOUR_MS, 24);
-
+
siderealTime = normalize(getSiderealOffset() + UT*1.002737909, 24);
}
return siderealTime;
}
-
+
private double getSiderealOffset() {
if (siderealT0 == INVALID) {
double JD = Math.floor(getJulianDay() - 0.5) + 0.5;
}
return siderealT0;
}
-
+
/**
* Returns the current local sidereal time, measured in hours
* @internal
public double getLocalSidereal() {
return normalize(getGreenwichSidereal() + (double)fGmtOffset/HOUR_MS, 24);
}
-
+
/**
* Converts local sidereal time to Universal Time.
*
* on this object's current date.
*
* @return The corresponding Universal Time, in milliseconds since
- * 1 Jan 1970, GMT.
+ * 1 Jan 1970, GMT.
*/
private long lstToUT(double lst) {
// Convert to local mean time
double lt = normalize((lst - getSiderealOffset()) * 0.9972695663, 24);
-
+
// Then find local midnight on this day
long base = DAY_MS * ((time + fGmtOffset)/DAY_MS) - fGmtOffset;
-
+
//out(" lt =" + lt + " hours");
//out(" base=" + new Date(base));
-
+
return base + (long)(lt * HOUR_MS);
}
-
-
+
+
//-------------------------------------------------------------------------
// Coordinate transformations, all based on the current time of this object
//-------------------------------------------------------------------------
double obliq = eclipticObliquity();
double sinE = Math.sin(obliq);
double cosE = Math.cos(obliq);
-
+
double sinL = Math.sin(eclipLong);
double cosL = Math.cos(eclipLong);
-
+
double sinB = Math.sin(eclipLat);
double cosB = Math.cos(eclipLat);
double tanB = Math.tan(eclipLat);
-
+
return new Equatorial(Math.atan2(sinL*cosE - tanB*sinE, cosL),
Math.asin(sinB*cosE + cosB*sinE*sinL) );
}
public Horizon eclipticToHorizon(double eclipLong)
{
Equatorial equatorial = eclipticToEquatorial(eclipLong);
-
+
double H = getLocalSidereal()*PI/12 - equatorial.ascension; // Hour-angle
-
+
double sinH = Math.sin(H);
double cosH = Math.cos(H);
double sinD = Math.sin(equatorial.declination);
double cosD = Math.cos(equatorial.declination);
double sinL = Math.sin(fLatitude);
double cosL = Math.cos(fLatitude);
-
+
double altitude = Math.asin(sinD*sinL + cosD*cosL*cosH);
double azimuth = Math.atan2(-cosD*cosL*sinH, sinD - sinL * Math.sin(altitude));
return new Horizon(azimuth, altitude);
}
-
+
//-------------------------------------------------------------------------
// The Sun
//-------------------------------------------------------------------------
{
// See page 86 of "Practial Astronomy with your Calculator",
// by Peter Duffet-Smith, for details on the algorithm.
-
+
if (sunLongitude == INVALID) {
double[] result = getSunLongitude(getJulianDay());
sunLongitude = result[0];
}
return sunLongitude;
}
-
+
/**
* TODO Make this public when the entire class is package-private.
*/
{
// See page 86 of "Practial Astronomy with your Calculator",
// by Peter Duffet-Smith, for details on the algorithm.
-
+
double day = julian - JD_EPOCH; // Days since epoch
-
+
// Find the angular distance the sun in a fictitious
// circular orbit has travelled since the epoch.
double epochAngle = norm2PI(PI2/TROPICAL_YEAR*day);
-
+
// The epoch wasn't at the sun's perigee; find the angular distance
// since perigee, which is called the "mean anomaly"
double meanAnomaly = norm2PI(epochAngle + SUN_ETA_G - SUN_OMEGA_G);
-
+
// Now find the "true anomaly", e.g. the real solar longitude
// by solving Kepler's equation for an elliptical orbit
// NOTE: The 3rd ed. of the book lists omega_g and eta_g in different
public Equatorial getSunPosition() {
return eclipticToEquatorial(getSunLongitude(), 0);
}
-
+
private static class SolarLongitude {
double value;
SolarLongitude(double val) { value = val; }
}
-
+
/**
* Constant representing the vernal equinox.
- * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
+ * For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
* Note: In this case, "vernal" refers to the northern hemisphere's seasons.
* @internal
*/
public static final SolarLongitude VERNAL_EQUINOX = new SolarLongitude(0);
-
+
/**
* Constant representing the summer solstice.
* For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
* @internal
*/
public static final SolarLongitude SUMMER_SOLSTICE = new SolarLongitude(PI/2);
-
+
/**
* Constant representing the autumnal equinox.
* For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
* @internal
*/
public static final SolarLongitude AUTUMN_EQUINOX = new SolarLongitude(PI);
-
+
/**
* Constant representing the winter solstice.
* For use with {@link #getSunTime(SolarLongitude, boolean) getSunTime}.
* @internal
*/
public static final SolarLongitude WINTER_SOLSTICE = new SolarLongitude((PI*3)/2);
-
+
/**
* Find the next time at which the sun's ecliptic longitude will have
- * the desired value.
+ * the desired value.
* @internal
*/
public long getSunTime(double desired, boolean next)
{
- return timeOfAngle( new AngleFunc() { public double eval() { return getSunLongitude(); } },
+ return timeOfAngle( new AngleFunc() { @Override
+ public double eval() { return getSunLongitude(); } },
desired,
TROPICAL_YEAR,
MINUTE_MS,
next);
}
-
+
/**
* Find the next time at which the sun's ecliptic longitude will have
- * the desired value.
+ * the desired value.
* @internal
*/
public long getSunTime(SolarLongitude desired, boolean next) {
return getSunTime(desired.value, next);
}
-
+
/**
* Returns the time (GMT) of sunrise or sunset on the local date to which
* this calendar is currently set.
* official time zone and the geographic longitude, the
* computation can flop over into an adjacent day if this object
* is set to a time near local midnight.
- *
+ *
* @internal
*/
- public long getSunRiseSet(boolean rise)
- {
+ public long getSunRiseSet(boolean rise) {
long t0 = time;
// Make a rough guess: 6am or 6pm local time on the current day
long noon = ((time + fGmtOffset)/DAY_MS)*DAY_MS - fGmtOffset + 12*HOUR_MS;
-
+
setTime(noon + (rise ? -6L : 6L) * HOUR_MS);
-
+
long t = riseOrSet(new CoordFunc() {
- public Equatorial eval() { return getSunPosition(); }
- },
- rise,
- .533 * DEG_RAD, // Angular Diameter
- 34 /60.0 * DEG_RAD, // Refraction correction
- MINUTE_MS / 12); // Desired accuracy
+ @Override
+ public Equatorial eval() { return getSunPosition(); }
+ },
+ rise,
+ .533 * DEG_RAD, // Angular Diameter
+ 34 /60.0 * DEG_RAD, // Refraction correction
+ MINUTE_MS / 12); // Desired accuracy
- setTime(t0);
- return t;
- }
+ setTime(t0);
+ return t;
+ }
// Commented out - currently unused. ICU 2.6, Alan
// //-------------------------------------------------------------------------
// if (lst1s > 24) lst1s -= 24;
// if (lst2r > 24) lst2r -= 24;
// if (lst2s > 24) lst2s -= 24;
-//
+//
// // 4. Convert LSTs to GSTs. If GST1 > GST2, add 24 to GST2.
// double gst1r = lstToGst(lst1r);
// double gst1s = lstToGst(lst1s);
//
// // 5. Calculate GST at 0h UT of this date
// double t00 = utToGst(0);
-//
+//
// // 6. Calculate GST at 0h on the observer's longitude
// double offset = Math.round(fLongitude*12/PI); // p.95 step 6; he _rounds_ to nearest 15 deg.
// double t00p = t00 - offset*1.002737909;
// if (t00p < 0) t00p += 24; // do NOT normalize
-//
+//
// // 7. Adjust
// if (gst1r < t00p) {
// gst1r += 24;
// double delta = fLongitude * 24 / PI2;
// return normalize(lst - delta, 24);
// }
-
+
// Commented out - currently unused. ICU 2.6, Alan
// /**
// * Convert UT to GST on this date.
// //-------------------------------------------------------------------------
//
// // Note: This method appears to produce inferior accuracy as
-// // compared to getSunRiseSet().
+// // compared to getSunRiseSet().
//
// /**
// * TODO Make this public when the entire class is package-private.
// double d = (double)(time - EPOCH_2000_MS) / DAY_MS;
//
// // Now compute the Local Sidereal Time, LST:
-// //
+// //
// double LST = 98.9818 + 0.985647352 * d + /*UT*15 + long*/
// fLongitude*RAD_DEG;
-// //
+// //
// // (east long. positive). Note that LST is here expressed in degrees,
// // where 15 degrees corresponds to one hour. Since LST really is an angle,
// // it's convenient to use one unit---degrees---throughout.
//
// // COMPUTING THE SUN'S POSITION
// // ----------------------------
-// //
+// //
// // To be able to compute the Sun's rise/set times, you need to be able to
// // compute the Sun's position at any time. First compute the "day
// // number" d as outlined above, for the desired moment. Next compute:
-// //
+// //
// double oblecl = 23.4393 - 3.563E-7 * d;
-// //
+// //
// double w = 282.9404 + 4.70935E-5 * d;
// double M = 356.0470 + 0.9856002585 * d;
// double e = 0.016709 - 1.151E-9 * d;
-// //
+// //
// // This is the obliquity of the ecliptic, plus some of the elements of
// // the Sun's apparent orbit (i.e., really the Earth's orbit): w =
// // argument of perihelion, M = mean anomaly, e = eccentricity.
// // Semi-major axis is here assumed to be exactly 1.0 (while not strictly
// // true, this is still an accurate approximation). Next compute E, the
// // eccentric anomaly:
-// //
+// //
// double E = M + e*(180/PI) * Math.sin(M*DEG_RAD) * ( 1.0 + e*Math.cos(M*DEG_RAD) );
-// //
+// //
// // where E and M are in degrees. This is it---no further iterations are
// // needed because we know e has a sufficiently small value. Next compute
// // the true anomaly, v, and the distance, r:
-// //
+// //
// /* r * cos(v) = */ double A = Math.cos(E*DEG_RAD) - e;
// /* r * sin(v) = */ double B = Math.sqrt(1 - e*e) * Math.sin(E*DEG_RAD);
-// //
+// //
// // and
-// //
+// //
// // r = sqrt( A*A + B*B )
// double v = Math.atan2( B, A )*RAD_DEG;
-// //
+// //
// // The Sun's true longitude, slon, can now be computed:
-// //
+// //
// double slon = v + w;
-// //
+// //
// // Since the Sun is always at the ecliptic (or at least very very close to
// // it), we can use simplified formulae to convert slon (the Sun's ecliptic
// // longitude) to sRA and sDec (the Sun's RA and Dec):
-// //
+// //
// // sin(slon) * cos(oblecl)
// // tan(sRA) = -------------------------
// // cos(slon)
-// //
+// //
// // sin(sDec) = sin(oblecl) * sin(slon)
-// //
+// //
// // As was the case when computing az, the Azimuth, if possible use an
// // atan2() function to compute sRA.
//
//
// // COMPUTING RISE AND SET TIMES
// // ----------------------------
-// //
+// //
// // To compute when an object rises or sets, you must compute when it
// // passes the meridian and the HA of rise/set. Then the rise time is
// // the meridian time minus HA for rise/set, and the set time is the
// // meridian time plus the HA for rise/set.
-// //
+// //
// // To find the meridian time, compute the Local Sidereal Time at 0h local
// // time (or 0h UT if you prefer to work in UT) as outlined above---name
// // that quantity LST0. The Meridian Time, MT, will now be:
-// //
+// //
// // MT = RA - LST0
// double MT = normalize(sRA - LST, 360);
-// //
+// //
// // where "RA" is the object's Right Ascension (in degrees!). If negative,
// // add 360 deg to MT. If the object is the Sun, leave the time as it is,
// // but if it's stellar, multiply MT by 365.2422/366.2422, to convert from
// // sidereal to solar time. Now, compute HA for rise/set, name that
// // quantity HA0:
-// //
+// //
// // sin(h0) - sin(lat) * sin(Dec)
// // cos(HA0) = ---------------------------------
// // cos(lat) * cos(Dec)
-// //
+// //
// // where h0 is the altitude selected to represent rise/set. For a purely
// // mathematical horizon, set h0 = 0 and simplify to:
-// //
+// //
// // cos(HA0) = - tan(lat) * tan(Dec)
-// //
+// //
// // If you want to account for refraction on the atmosphere, set h0 = -35/60
// // degrees (-35 arc minutes), and if you want to compute the rise/set times
// // for the Sun's upper limb, set h0 = -50/60 (-50 arc minutes).
-// //
+// //
// double h0 = -50/60 * DEG_RAD;
//
// double HA0 = Math.acos(
// // When HA0 has been computed, leave it as it is for the Sun but multiply
// // by 365.2422/366.2422 for stellar objects, to convert from sidereal to
// // solar time. Finally compute:
-// //
+// //
// // Rise time = MT - HA0
// // Set time = MT + HA0
-// //
+// //
// // convert the times from degrees to hours by dividing by 15.
-// //
+// //
// // If you'd like to check that your calculations are accurate or just
// // need a quick result, check the USNO's Sun or Moon Rise/Set Table,
// // <URL:http://aa.usno.navy.mil/AA/data/docs/RS_OneYear.html>.
//-------------------------------------------------------------------------
// The Moon
//-------------------------------------------------------------------------
-
+
static final double moonL0 = 318.351648 * PI/180; // Mean long. at epoch
static final double moonP0 = 36.340410 * PI/180; // Mean long. of perigee
static final double moonN0 = 318.510107 * PI/180; // Mean long. of node
static final double moonI = 5.145366 * PI/180; // Inclination of orbit
static final double moonE = 0.054900; // Eccentricity of orbit
-
+
// These aren't used right now
static final double moonA = 3.84401e5; // semi-major axis (km)
static final double moonT0 = 0.5181 * PI/180; // Angular size at distance A
static final double moonPi = 0.9507 * PI/180; // Parallax at distance A
-
+
/**
* The position of the moon at the time set on this
* object, in equatorial coordinates.
// Calculate the solar longitude. Has the side effect of
// filling in "meanAnomalySun" as well.
double sunLong = getSunLongitude();
-
+
//
// Find the # of days since the epoch of our orbital parameters.
// TODO: Convert the time of day portion into ephemeris time
//
double day = getJulianDay() - JD_EPOCH; // Days since epoch
-
+
// Calculate the mean longitude and anomaly of the moon, based on
// a circular orbit. Similar to the corresponding solar calculation.
double meanLongitude = norm2PI(13.1763966*PI/180*day + moonL0);
double meanAnomalyMoon = norm2PI(meanLongitude - 0.1114041*PI/180 * day - moonP0);
-
+
//
// Calculate the following corrections:
// Evection: the sun's gravity affects the moon's eccentricity
double a3 = 0.3700*PI/180 * Math.sin(meanAnomalySun);
meanAnomalyMoon += evection - annual - a3;
-
+
//
// More correction factors:
// center equation of the center correction
//
double center = 6.2886*PI/180 * Math.sin(meanAnomalyMoon);
double a4 = 0.2140*PI/180 * Math.sin(2 * meanAnomalyMoon);
-
+
// Now find the moon's corrected longitude
moonLongitude = meanLongitude + evection + center - annual + a4;
// the earth the moon is on
//
double variation = 0.6583*PI/180 * Math.sin(2*(moonLongitude - sunLong));
-
+
moonLongitude += variation;
-
+
//
// What we've calculated so far is the moon's longitude in the plane
// of its own orbit. Now map to the ecliptic to get the latitude
double y = Math.sin(moonLongitude - nodeLongitude);
double x = Math.cos(moonLongitude - nodeLongitude);
-
+
moonEclipLong = Math.atan2(y*Math.cos(moonI), x) + nodeLongitude;
double moonEclipLat = Math.asin(y * Math.sin(moonI));
}
return moonPosition;
}
-
+
/**
* The "age" of the moon at the time specified in this object.
* This is really the angle between the
// some the intermediate results cached during that calculation.
//
getMoonPosition();
-
+
return norm2PI(moonEclipLong - sunLongitude);
}
-
+
/**
* Calculate the phase of the moon at the time set in this object.
* The returned phase is a <code>double</code> in the range
// by Peter Duffet-Smith, for details on the algorithm.
return 0.5 * (1 - Math.cos(getMoonAge()));
}
-
+
private static class MoonAge {
double value;
MoonAge(double val) { value = val; }
}
-
+
/**
* Constant representing a new moon.
* For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
* @internal
*/
public static final MoonAge FIRST_QUARTER = new MoonAge(PI/2);
-
+
/**
* Constant representing a full moon.
* For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
* @internal
*/
public static final MoonAge FULL_MOON = new MoonAge(PI);
-
+
/**
* Constant representing the moon's last quarter.
* For use with {@link #getMoonTime(MoonAge, boolean) getMoonTime}
* @internal
*/
public static final MoonAge LAST_QUARTER = new MoonAge((PI*3)/2);
-
+
/**
* Find the next or previous time at which the Moon's ecliptic
- * longitude will have the desired value.
+ * longitude will have the desired value.
* <p>
* @param desired The desired longitude.
* @param next <tt>true</tt> if the next occurrance of the phase
- * is desired, <tt>false</tt> for the previous occurrance.
+ * is desired, <tt>false</tt> for the previous occurrance.
* @internal
*/
public long getMoonTime(double desired, boolean next)
{
return timeOfAngle( new AngleFunc() {
+ @Override
public double eval() { return getMoonAge(); } },
desired,
SYNODIC_MONTH,
MINUTE_MS,
next);
}
-
+
/**
* Find the next or previous time at which the moon will be in the
* desired phase.
* <p>
* @param desired The desired phase of the moon.
* @param next <tt>true</tt> if the next occurrance of the phase
- * is desired, <tt>false</tt> for the previous occurrance.
+ * is desired, <tt>false</tt> for the previous occurrance.
* @internal
*/
public long getMoonTime(MoonAge desired, boolean next) {
return getMoonTime(desired.value, next);
}
-
+
/**
* Returns the time (GMT) of sunrise or sunset on the local date to which
* this calendar is currently set.
public long getMoonRiseSet(boolean rise)
{
return riseOrSet(new CoordFunc() {
+ @Override
public Equatorial eval() { return getMoonPosition(); }
},
rise,
//-------------------------------------------------------------------------
// Interpolation methods for finding the time at which a given event occurs
//-------------------------------------------------------------------------
-
+
private interface AngleFunc {
public double eval();
}
-
+
private long timeOfAngle(AngleFunc func, double desired,
double periodDays, long epsilon, boolean next)
{
// Find the value of the function at the current time
double lastAngle = func.eval();
-
+
// Find out how far we are from the desired angle
double deltaAngle = norm2PI(desired - lastAngle) ;
-
+
// Using the average period, estimate the next (or previous) time at
// which the desired angle occurs.
double deltaT = (deltaAngle + (next ? 0 : -PI2)) * (periodDays*DAY_MS) / PI2;
-
+
double lastDeltaT = deltaT; // Liu
long startTime = time; // Liu
-
+
setTime(time + (long)deltaT);
// Now iterate until we get the error below epsilon. Throughout
// Correct the time estimate based on how far off the angle is
deltaT = normPI(desired - angle) * factor;
-
+
// HACK:
- //
+ //
// If abs(deltaT) begins to diverge we need to quit this loop.
// This only appears to happen when attempting to locate, for
// example, a new moon on the day of the new moon. E.g.:
- //
+ //
// This result is correct:
// newMoon(7508(Mon Jul 23 00:00:00 CST 1990,false))=
// Sun Jul 22 10:57:41 CST 1990
- //
+ //
// But attempting to make the same call a day earlier causes deltaT
// to diverge:
// CalendarAstronomer.timeOfAngle() diverging: 1.348508727575625E9 ->
setTime(time + (long)deltaT);
}
while (Math.abs(deltaT) > epsilon);
-
+
return time;
}
-
+
private interface CoordFunc {
public Equatorial eval();
}
-
+
private long riseOrSet(CoordFunc func, boolean rise,
- double diameter, double refraction,
+ double diameter, double refraction,
long epsilon)
- {
+ {
Equatorial pos = null;
double tanL = Math.tan(fLatitude);
long deltaT = Long.MAX_VALUE;
int count = 0;
-
+
//
// Calculate the object's position at the current time, then use that
// position to calculate the time of rising or setting. The position
pos = func.eval();
double angle = Math.acos(-tanL * Math.tan(pos.declination));
double lst = ((rise ? PI2-angle : angle) + pos.ascension ) * 24 / PI2;
-
+
// Convert from LST to Universal Time.
long newTime = lstToUT( lst );
-
+
deltaT = newTime - time;
setTime(newTime);
}
double x = diameter / 2 + refraction;
double y = Math.asin(Math.sin(x) / Math.sin(psi));
long delta = (long)((240 * y * RAD_DEG / cosD)*SECOND_MS);
-
+
return time + (rise ? -delta : delta);
}
-
+
//-------------------------------------------------------------------------
// Other utility methods
//-------------------------------------------------------------------------
private static final double normalize(double value, double range) {
return value - range * Math.floor(value / range);
}
-
+
/**
* Normalize an angle so that it's in the range 0 - 2pi.
* For positive angles this is just (angle % 2pi), but the Java
private static final double norm2PI(double angle) {
return normalize(angle, PI2);
}
-
+
/**
* Normalize an angle into the range -PI - PI
*/
private static final double normPI(double angle) {
return normalize(angle + PI, PI2) - PI;
}
-
+
/**
* Find the "true anomaly" (longitude) of an object from
* its mean anomaly and the eccentricity of its orbit. This uses
*
* @param meanAnomaly The object's longitude calculated as if it were in
* a regular, circular orbit, measured in radians
- * from the point of perigee.
+ * from the point of perigee.
*
* @param eccentricity The eccentricity of the orbit
*
do {
delta = E - eccentricity * Math.sin(E) - meanAnomaly;
E = E - delta / (1 - eccentricity * Math.cos(E));
- }
+ }
while (Math.abs(delta) > 1e-5); // epsilon = 1e-5 rad
return 2.0 * Math.atan( Math.tan(E/2) * Math.sqrt( (1+eccentricity)
/(1-eccentricity) ) );
}
-
+
/**
* Return the obliquity of the ecliptic (the angle between the ecliptic
* and the earth's equator) at the current time. This varies due to
final double epoch = 2451545.0; // 2000 AD, January 1.5
double T = (getJulianDay() - epoch) / 36525;
-
+
eclipObliquity = 23.439292
- 46.815/3600 * T
- 0.0006/3600 * T*T
+ 0.00181/3600 * T*T*T;
-
+
eclipObliquity *= DEG_RAD;
}
return eclipObliquity;
}
-
-
+
+
//-------------------------------------------------------------------------
// Private data
//-------------------------------------------------------------------------
-
+
/**
* Current time in milliseconds since 1/1/1970 AD
* @see java.util.Date#getTime
*/
private long time;
-
+
/* These aren't used yet, but they'll be needed for sunset calculations
* and equatorial to horizon coordinate conversions
*/
private double fLongitude = 0.0;
private double fLatitude = 0.0;
private long fGmtOffset = 0;
-
+
//
// The following fields are used to cache calculated results for improved
// performance. These values all depend on the current time setting
// of this object, so the clearCache method is provided.
//
static final private double INVALID = Double.MIN_VALUE;
-
+
private transient double julianDay = INVALID;
private transient double julianCentury = INVALID;
private transient double sunLongitude = INVALID;
private transient double eclipObliquity = INVALID;
private transient double siderealT0 = INVALID;
private transient double siderealTime = INVALID;
-
+
private transient Equatorial moonPosition = null;
private void clearCache() {
siderealT0 = INVALID;
moonPosition = null;
}
-
+
//private static void out(String s) {
// System.out.println(s);
//}
-
+
//private static String deg(double rad) {
// return Double.toString(rad * RAD_DEG);
//}
-
+
//private static String hours(long ms) {
// return Double.toString((double)ms / HOUR_MS) + " hours";
//}
public String local(long localMillis) {
return new Date(localMillis - TimeZone.getDefault().getRawOffset()).toString();
}
-
-
+
+
/**
* Represents the position of an object in the sky relative to the ecliptic,
- * the plane of the earth's orbit around the Sun.
+ * the plane of the earth's orbit around the Sun.
* This is a spherical coordinate system in which the latitude
* specifies the position north or south of the plane of the ecliptic.
* The longitude specifies the position along the ecliptic plane
* Return a string representation of this object
* @internal
*/
+ @Override
public String toString() {
return Double.toString(longitude*RAD_DEG) + "," + (latitude*RAD_DEG);
}
-
+
/**
* The ecliptic latitude, in radians. This specifies an object's
* position north or south of the plane of the ecliptic,
* @internal
*/
public final double latitude;
-
+
/**
* The ecliptic longitude, in radians.
* This specifies an object's position along the ecliptic plane
}
/**
- * Represents the position of an
- * object in the sky relative to the plane of the earth's equator.
+ * Represents the position of an
+ * object in the sky relative to the plane of the earth's equator.
* The <i>Right Ascension</i> specifies the position east or west
* along the equator, relative to the sun's position at the vernal
* equinox. The <i>Declination</i> is the position north or south
* angles measured in degrees.
* @internal
*/
+ @Override
public String toString() {
return Double.toString(ascension*RAD_DEG) + "," + (declination*RAD_DEG);
}
-
+
/**
* Return a string representation of this object with the right ascension
* measured in hours, minutes, and seconds.
public String toHmsString() {
return radToHms(ascension) + "," + radToDms(declination);
}
-
+
/**
- * The right ascension, in radians.
+ * The right ascension, in radians.
* This is the position east or west along the equator
* relative to the sun's position at the vernal equinox,
* with positive angles representing East.
* @internal
*/
public final double ascension;
-
+
/**
* The declination, in radians.
* This is the position north or south of the equatorial plane,
}
/**
- * Represents the position of an object in the sky relative to
+ * Represents the position of an object in the sky relative to
* the local horizon.
* The <i>Altitude</i> represents the object's elevation above the horizon,
* with objects below the horizon having a negative altitude.
* angles measured in degrees.
* @internal
*/
+ @Override
public String toString() {
return Double.toString(altitude*RAD_DEG) + "," + (azimuth*RAD_DEG);
}
-
- /**
- * The object's altitude above the horizon, in radians.
+
+ /**
+ * The object's altitude above the horizon, in radians.
* @internal
*/
public final double altitude;
-
- /**
- * The object's direction, in radians clockwise from north.
+
+ /**
+ * The object's direction, in radians clockwise from north.
* @internal
*/
public final double azimuth;
int hrs = (int) (angle*RAD_HOUR);
int min = (int)((angle*RAD_HOUR - hrs) * 60);
int sec = (int)((angle*RAD_HOUR - hrs - min/60.0) * 3600);
-
+
return Integer.toString(hrs) + "h" + min + "m" + sec + "s";
}
-
+
static private String radToDms(double angle) {
int deg = (int) (angle*RAD_DEG);
int min = (int)((angle*RAD_DEG - deg) * 60);
int sec = (int)((angle*RAD_DEG - deg - min/60.0) * 3600);
-
+
return Integer.toString(deg) + "\u00b0" + min + "'" + sec + "\"";
}
}
}
// public methods --------------------------------------------------
-
+
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
- offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
// handle U+D800..U+10FFFF
offset = getCodePointOffset(ch);
-
+
// return -1 if there is an error, in this case we return the default
// value: m_initialValue_
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
/**
* <p>Get a value from a folding offset (from the value of a lead surrogate)
* and a trail surrogate.</p>
- * <p>If the
+ * <p>If the
* @param leadvalue value associated with the lead surrogate which contains
* the folding offset
* @param trail surrogate
}
int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
if (offset > 0) {
- return m_data_[getRawOffset(offset,
+ return m_data_[getRawOffset(offset,
(char)(trail & SURROGATE_MASK_))];
}
return m_initialValue_;
}
-
+
/**
* <p>Gets the latin 1 fast path value.</p>
- * <p>Note this only works if latin 1 characters have their own linear
+ * <p>Note this only works if latin 1 characters have their own linear
* array.</p>
* @param ch latin 1 characters
* @return value associated with latin character
*/
- public final char getLatin1LinearValue(char ch)
+ public final char getLatin1LinearValue(char ch)
{
return m_data_[INDEX_STAGE_3_MASK_ + 1 + m_dataOffset_ + ch];
}
-
+
/**
* Checks if the argument Trie has the same data as this Trie
* @param other Trie to check
* otherwise
*/
///CLOVER:OFF
- public boolean equals(Object other)
+ @Override
+ public boolean equals(Object other)
{
boolean result = super.equals(other);
if (result && other instanceof CharTrie) {
}
return false;
}
-
+
+ @Override
public int hashCode() {
assert false : "hashCode not designed";
return 42;
* data array</p>
* @param bytes buffer containing trie data
*/
+ @Override
protected final void unserialize(ByteBuffer bytes)
{
int indexDataLength = m_dataOffset_ + m_dataLength_;
* @param trail trailing surrogate
* @return offset to data
*/
+ @Override
protected final int getSurrogateOffset(char lead, char trail)
{
if (m_dataManipulate_ == null) {
throw new NullPointerException(
"The field DataManipulate in this Trie is null");
}
-
+
// get fold position for the next trail surrogate
int offset = m_dataManipulate_.getFoldingOffset(getLeadValue(lead));
// value: m_initialValue_
return -1;
}
-
+
/**
* Gets the value at the argument index.
* For use internally in TrieIterator.
* @return 32 bit value
* @see com.ibm.icu.impl.TrieIterator
*/
+ @Override
protected final int getValue(int index)
{
return m_data_[index];
/**
* Gets the default initial value
- * @return 32 bit value
+ * @return 32 bit value
*/
+ @Override
protected final int getInitialValue()
{
return m_initialValue_;
}
-
+
// private data members --------------------------------------------
/**
import com.ibm.icu.text.UCharacterIterator;
/**
- * This class is a wrapper around CharacterIterator and implements the
+ * This class is a wrapper around CharacterIterator and implements the
* UCharacterIterator protocol
* @author ram
*/
public class CharacterIteratorWrapper extends UCharacterIterator {
-
+
private CharacterIterator iterator;
-
-
+
+
public CharacterIteratorWrapper(CharacterIterator iter){
if(iter==null){
throw new IllegalArgumentException();
}
- iterator = iter;
+ iterator = iter;
}
/**
* @see UCharacterIterator#current()
*/
+ @Override
public int current() {
int c = iterator.current();
if(c==CharacterIterator.DONE){
/**
* @see UCharacterIterator#getLength()
*/
+ @Override
public int getLength() {
return (iterator.getEndIndex() - iterator.getBeginIndex());
}
/**
* @see UCharacterIterator#getIndex()
*/
+ @Override
public int getIndex() {
return iterator.getIndex();
}
/**
* @see UCharacterIterator#next()
*/
+ @Override
public int next() {
int i = iterator.current();
iterator.next();
- if(i==CharacterIterator.DONE){
+ if(i==CharacterIterator.DONE){
return DONE;
}
return i;
/**
* @see UCharacterIterator#previous()
*/
+ @Override
public int previous() {
int i = iterator.previous();
if(i==CharacterIterator.DONE){
/**
* @see UCharacterIterator#setIndex(int)
*/
+ @Override
public void setIndex(int index) {
try{
iterator.setIndex(index);
/**
* @see UCharacterIterator#setToLimit()
*/
+ @Override
public void setToLimit() {
iterator.setIndex(iterator.getEndIndex());
}
/**
* @see UCharacterIterator#getText(char[])
*/
+ @Override
public int getText(char[] fillIn, int offset){
- int length =iterator.getEndIndex() - iterator.getBeginIndex();
+ int length =iterator.getEndIndex() - iterator.getBeginIndex();
int currentIndex = iterator.getIndex();
if(offset < 0 || offset + length > fillIn.length){
throw new IndexOutOfBoundsException(Integer.toString(length));
}
-
+
for (char ch = iterator.first(); ch != CharacterIterator.DONE; ch = iterator.next()) {
fillIn[offset++] = ch;
}
* Creates a clone of this iterator. Clones the underlying character iterator.
* @see UCharacterIterator#clone()
*/
+ @Override
public Object clone(){
try {
CharacterIteratorWrapper result = (CharacterIteratorWrapper) super.clone();
result.iterator = (CharacterIterator)this.iterator.clone();
return result;
- } catch (CloneNotSupportedException e) {
+ } catch (CloneNotSupportedException e) {
return null; // only invoked if bad underlying character iterator
}
}
-
+
+ @Override
public int moveIndex(int delta){
- int length = iterator.getEndIndex() - iterator.getBeginIndex();
+ int length = iterator.getEndIndex() - iterator.getBeginIndex();
int idx = iterator.getIndex()+delta;
-
+
if(idx < 0) {
idx = 0;
} else if(idx > length) {
}
return iterator.setIndex(idx);
}
-
+
/**
* @see UCharacterIterator#getCharacterIterator()
*/
+ @Override
public CharacterIterator getCharacterIterator(){
return (CharacterIterator)iterator.clone();
- }
+ }
}
// this constructor on Android, because ClassLoaderUtil.getClassLoader()
// should get non-null ClassLoader before calling
// ClassLoaderUtil.getBootstrapClassLoader().
- //
+ //
// On other common JREs (such as Oracle, OpenJDK),
// Object.class.getClassLoader() returns null, but
// super(null) is commonly used for accessing the bootstrap
* Lazily create a singleton BootstrapClassLoader.
* This class loader might be necessary when ICU4J classes are
* initialized by bootstrap class loader.
- *
+ *
* @return The BootStrapClassLoader singleton instance
*/
private static ClassLoader getBootstrapClassLoader() {
ClassLoader cl = null;
if (System.getSecurityManager() != null) {
cl = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
- public BootstrapClassLoader run() {
- return new BootstrapClassLoader();
- }
- });
+ @Override
+ public BootstrapClassLoader run() {
+ return new BootstrapClassLoader();
+ }
+ });
} else {
cl = new BootstrapClassLoader();
}
} catch (MissingResourceException ex) {
if ( !nsName.equals("latn") ) {
try {
- minusString = rb.getStringWithFallback("NumberElements/latn/symbols/minusSign");
+ minusString = rb.getStringWithFallback("NumberElements/latn/symbols/minusSign");
} catch (MissingResourceException ex1) {
minusString = "-";
}
minusSign = elems[10];
}
+ @Override
public void setMaximumIntegerDigits(int newValue) {
maxIntDigits = newValue;
}
+ @Override
public int getMaximumIntegerDigits() {
return maxIntDigits;
}
+ @Override
public void setMinimumIntegerDigits(int newValue) {
minIntDigits = newValue;
}
+ @Override
public int getMinimumIntegerDigits() {
return minIntDigits;
}
return digits.clone();
}
+ @Override
public StringBuffer format(double number, StringBuffer toAppendTo,
FieldPosition pos) {
throw new UnsupportedOperationException("StringBuffer format(double, StringBuffer, FieldPostion) is not implemented");
}
+ @Override
public StringBuffer format(long numberL, StringBuffer toAppendTo,
FieldPosition pos) {
}
return toAppendTo;
}
-
+
+ @Override
public StringBuffer format(BigInteger number, StringBuffer toAppendTo,
FieldPosition pos) {
throw new UnsupportedOperationException("StringBuffer format(BigInteger, StringBuffer, FieldPostion) is not implemented");
}
+ @Override
public StringBuffer format(java.math.BigDecimal number, StringBuffer toAppendTo,
FieldPosition pos) {
throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented");
}
+ @Override
public StringBuffer format(BigDecimal number,
StringBuffer toAppendTo, FieldPosition pos) {
throw new UnsupportedOperationException("StringBuffer format(BigDecimal, StringBuffer, FieldPostion) is not implemented");
*/
private static final long PARSE_THRESHOLD = 922337203685477579L; // (Long.MAX_VALUE / 10) - 1
+ @Override
public Number parse(String text, ParsePosition parsePosition) {
long num = 0;
boolean sawNumber = false;
return result;
}
+ @Override
public boolean equals(Object obj) {
if (obj == null || !super.equals(obj) || !(obj instanceof DateNumberFormat)) {
return false;
&& this.positiveOnly == other.positiveOnly
&& Arrays.equals(this.digits, other.digits));
}
-
+
+ @Override
public int hashCode() {
return super.hashCode();
}
private static final int DATA_FORMAT = 0x436d6e44;
private static final class IsAcceptable implements Authenticate {
- // @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 1;
}
} else if (i == key.length()) {
return -1; // key < table key because key is shorter.
}
- int diff = (int)key.charAt(i) - c2;
+ int diff = key.charAt(i) - c2;
if (diff != 0) {
return diff;
}
} else if (i == key.length()) {
return -1; // key < table key because key is shorter.
}
- int diff = (int)key.charAt(i) - c2;
+ int diff = key.charAt(i) - c2;
if (diff != 0) {
return diff;
}
{
/**
* Method used in ICUBinary.readHeader() to provide data format
- * authentication.
+ * authentication.
* @param version version of the current data
* @return true if dataformat is an acceptable version, false otherwise
*/
public boolean isDataVersionAcceptable(byte version[]);
}
-
+
// public methods --------------------------------------------------------
/**
bytes.position(headerSize);
return // dataVersion
- ((int)bytes.get(20) << 24) |
+ (bytes.get(20) << 24) |
((bytes.get(21) & 0xff) << 16) |
((bytes.get(22) & 0xff) << 8) |
(bytes.get(23) & 0xff);
}
// private variables -------------------------------------------------
-
+
/**
* Magic numbers to authenticate the data file
*/
private static final byte MAGIC1 = (byte)0xda;
private static final byte MAGIC2 = (byte)0x27;
-
+
/**
* File format authentication values
*/
private static final byte CHAR_SET_ = 0;
private static final byte CHAR_SIZE_ = 2;
-
+
/**
* Error messages
*/
- private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ =
+ private static final String MAGIC_NUMBER_AUTHENTICATION_FAILED_ =
"ICU data file error: Not an ICU data file";
private static final String HEADER_AUTHENTICATION_FAILED_ =
"ICU data file error: Header authentication failed, please check if you have a valid ICU data file";
if (System.getSecurityManager() != null) {
try {
val = AccessController.doPrivileged(new PrivilegedAction<String>() {
+ @Override
public String run() {
return System.getProperty(fname);
}
URL i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<URL>() {
+ @Override
public URL run() {
return ICUData.class.getResource(resourceName);
}
InputStream i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
+ @Override
public InputStream run() {
return root.getResourceAsStream(resourceName);
}
InputStream i = null;
if (System.getSecurityManager() != null) {
i = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
+ @Override
public InputStream run() {
return loader.getResourceAsStream(resourceName);
}
* Convenience override for callers using locales. This uses
* createKey(ULocale.toString(), kind) to create a key, calls getKey, and then
* if actualReturn is not null, returns the actualResult from
- * getKey (stripping any prefix) into a ULocale.
+ * getKey (stripping any prefix) into a ULocale.
*/
public Object get(ULocale locale, int kind, ULocale[] actualReturn) {
Key key = createKey(locale, kind);
}
return locales;
}
-
+
/**
* A subclass of Key that implements a locale fallback mechanism.
* The first locale to search for is the locale provided by the
*
* <p>Canonicalization adjusts the locale string so that the
* section before the first understore is in lower case, and the rest
- * is in upper case, with no trailing underscores.</p>
+ * is in upper case, with no trailing underscores.</p>
*/
public static class LocaleKey extends ICUService.Key {
private int kind;
public static LocaleKey createWithCanonicalFallback(String primaryID, String canonicalFallbackID) {
return createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY);
}
-
+
/**
* Create a LocaleKey with canonical primary and fallback IDs.
*/
String canonicalPrimaryID = ULocale.getName(primaryID);
return new LocaleKey(primaryID, canonicalPrimaryID, canonicalFallbackID, kind);
}
-
+
/**
* Create a LocaleKey with canonical primary and fallback IDs.
*/
String canonicalPrimaryID = locale.getName();
return new LocaleKey(canonicalPrimaryID, canonicalPrimaryID, canonicalFallbackID, kind);
}
-
+
/**
* PrimaryID is the user's requested locale string,
* canonicalPrimaryID is this string in canonical form,
/**
* Return the (canonical) original ID.
*/
+ @Override
public String canonicalID() {
return primaryID;
}
/**
* Return the (canonical) current ID, or null if no current id.
*/
+ @Override
public String currentID() {
return currentID;
}
* Return the (canonical) current descriptor, or null if no current id.
* Includes the keywords, whereas the ID does not include keywords.
*/
+ @Override
public String currentDescriptor() {
String result = currentID();
if (result != null) {
* <p>First falls back through the primary ID, then through
* the fallbackID. The final fallback is "" (root)
* unless the primary id was "" (root), in which case
- * there is no fallback.
+ * there is no fallback.
*/
+ @Override
public boolean fallback() {
int x = currentID.lastIndexOf('_');
if (x != -1) {
}
/**
- * If a key created from id would eventually fallback to match the
+ * If a key created from id would eventually fallback to match the
* canonical ID of this key, return true.
*/
+ @Override
public boolean isFallbackOf(String id) {
return LocaleUtility.isFallbackOf(canonicalID(), id);
}
* the key against the supported IDs, and passes the canonicalLocale and
* kind off to handleCreate (which subclasses must implement).
*/
+ @Override
public Object create(Key key, ICUService service) {
if (handlesKey(key)) {
LocaleKey lkey = (LocaleKey)key;
int kind = lkey.kind();
-
+
ULocale uloc = lkey.currentLocale();
return handleCreate(uloc, kind, service);
} else {
/**
* Override of superclass method.
*/
+ @Override
public void updateVisibleIDs(Map<String, Factory> result) {
Set<String> cache = getSupportedIDs();
for (String id : cache) {
/**
* Return a localized name for the locale represented by id.
*/
+ @Override
public String getDisplayName(String id, ULocale locale) {
// assume if the user called this on us, we must have handled some fallback of this id
// if (isSupportedID(id)) {
///CLOVER:ON
/**
- * Return true if this id is one the factory supports (visible or
+ * Return true if this id is one the factory supports (visible or
* otherwise).
*/
protected boolean isSupportedID(String id) {
return getSupportedIDs().contains(id);
}
-
+
/**
- * Return the set of ids that this factory supports (visible or
+ * Return the set of ids that this factory supports (visible or
* otherwise). This can be called often and might need to be
* cached if it is expensive to create.
*/
/**
* For debugging.
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
if (name != null) {
public SimpleLocaleKeyFactory(Object obj, ULocale locale, int kind, boolean visible, String name) {
super(visible, name);
-
+
this.obj = obj;
this.id = locale.getBaseName();
this.kind = kind;
/**
* Returns the service object if kind/locale match. Service is not used.
*/
+ @Override
public Object create(Key key, ICUService service) {
if (!(key instanceof LocaleKey)) {
return null;
}
-
+
LocaleKey lkey = (LocaleKey)key;
if (kind != LocaleKey.KIND_ANY && kind != lkey.kind()) {
return null;
if (!id.equals(lkey.currentID())) {
return null;
}
-
+
return obj;
}
+ @Override
protected boolean isSupportedID(String idToCheck) {
return this.id.equals(idToCheck);
}
+ @Override
public void updateVisibleIDs(Map<String, Factory> result) {
if (visible) {
result.put(id, this);
}
}
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
buf.append(", id: ");
/**
* Return the supported IDs. This is the set of all locale names for the bundleName.
*/
+ @Override
protected Set<String> getSupportedIDs() {
- return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader());
+ return ICUResourceBundle.getFullLocaleNameSet(bundleName, loader());
}
/**
* Override of superclass method.
*/
+ @Override
public void updateVisibleIDs(Map<String, Factory> result) {
Set<String> visibleIDs = ICUResourceBundle.getAvailableLocaleNameSet(bundleName, loader()); // only visible ids
for (String id : visibleIDs) {
* Create the service. The default implementation returns the resource bundle
* for the locale, ignoring kind, and service.
*/
+ @Override
protected Object handleCreate(ULocale loc, int kind, ICUService service) {
return ICUResourceBundle.getBundleInstance(bundleName, loc, loader());
}
return ClassLoaderUtil.getClassLoader(getClass());
}
+ @Override
public String toString() {
return super.toString() + ", bundle: " + bundleName;
}
return fallbackLocaleName;
}
+ @Override
public Key createKey(String id) {
return LocaleKey.createWithCanonicalFallback(id, validateFallbackLocale());
}
* eventually dequeues the list and calls notifyListener on each
* listener in the list.</p>
*
- * <p>Subclasses override acceptsListener and notifyListener
+ * <p>Subclasses override acceptsListener and notifyListener
* to add type-safe notification. AcceptsListener should return
* true if the listener is of the appropriate type; ICUNotifier
* itself will ensure the listener is non-null and that the
* identical listener is not already registered with the Notifier.
- * NotifyListener should cast the listener to the appropriate
+ * NotifyListener should cast the listener to the appropriate
* type and call the appropriate method on the listener.
*/
public abstract class ICUNotifier {
* The listener must not be null. AcceptsListener must return
* true for the listener. Attempts to concurrently
* register the identical listener more than once will be
- * silently ignored.
+ * silently ignored.
*/
public void addListener(EventListener l) {
if (l == null) {
* Wait for a notification to be queued, then notify all
* listeners listed in the notification.
*/
+ @Override
public void run() {
EventListener[] list;
while (true) {
/**
* Return a string listing all the stats.
*/
+ @Override
public String toString() {
return " rc: " + _rc +
- " mrc: " + _mrc +
+ " mrc: " + _mrc +
" wrc: " + _wrc +
" wc: " + _wc +
" wwc: " + _wwc;
stats = null;
return result;
}
-
+
/**
* Return a snapshot of the current stats. This does not reset the stats.
*/
* <p>If there's a writer, or a waiting writer, increment the
* waiting reader count and block on this. Otherwise
* increment the active reader count and return. Caller must call
- * releaseRead when done (for example, in a finally block).</p>
+ * releaseRead when done (for example, in a finally block).</p>
*/
public void acquireRead() {
if (stats != null) { // stats is null by default
* having an active writer and return. Otherwise, add a lock to the
* end of the waiting writer list, and block on it. Caller
* must call releaseWrite when done (for example, in a finally
- * block).<p>
+ * block).<p>
*/
public void acquireWrite() {
if (stats != null) { // stats is null by default
* <p>If there are waiting readers, make them all active and
* notify all of them. Otherwise, notify the oldest waiting
* writer, if any. Call when finished with work controlled by
- * acquireWrite.</p>
+ * acquireWrite.</p>
*/
public void releaseWrite() {
rwl.writeLock().unlock();
* Return the service instance if the factory's id is equal to
* the key's currentID. Service is ignored.
*/
+ @Override
public Object create(Key key, ICUService service) {
if (id.equals(key.currentID())) {
return instance;
* If visible, adds a mapping from id -> this to the result,
* otherwise removes id from result.
*/
+ @Override
public void updateVisibleIDs(Map<String, Factory> result) {
if (visible) {
result.put(id, this);
* otherwise returns null. (This default implementation has
* no localized id information.)
*/
+ @Override
public String getDisplayName(String identifier, ULocale locale) {
return (visible && id.equals(identifier)) ? identifier : null;
}
/**
* For debugging.
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
buf.append(", id: ");
return f.getDisplayName(id, locale);
}
}
-
+
return null;
}
/**
- * Convenience override of getDisplayNames(ULocale, Comparator, String) that
+ * Convenience override of getDisplayNames(ULocale, Comparator, String) that
* uses the current default Locale as the locale, null as
* the comparator, and null for the matchID.
*/
synchronized (this) {
if (ref == dnref || dnref == null) {
dncache = new TreeMap<String, String>(com); // sorted
-
+
Map<String, Factory> m = getVisibleIDMap();
Iterator<Entry<String, Factory>> ei = m.entrySet().iterator();
while (ei.hasNext()) {
* requires a ServiceListener. Subclasses can override to accept
* different listeners.
*/
+ @Override
protected boolean acceptsListener(EventListener l) {
return l instanceof ServiceListener;
}
* Notify the listener, which by default is a ServiceListener.
* Subclasses can override to use a different listener.
*/
+ @Override
protected void notifyListener(EventListener l) {
((ServiceListener)l).serviceChanged(this);
}
/**
* Returns the result of super.toString, appending the name in curly braces.
*/
+ @Override
public String toString() {
return super.toString() + "{" + name + "}";
}
public IllegalIcuArgumentException(String errorMessage) {
super(errorMessage);
}
-
+
public IllegalIcuArgumentException(Throwable cause) {
super(cause);
}
-
+
public IllegalIcuArgumentException(String errorMessage, Throwable cause) {
super(errorMessage, cause);
}
-
+
+ @Override
public synchronized IllegalIcuArgumentException initCause(Throwable cause) {
return (IllegalIcuArgumentException) super.initCause(cause);
}
-
+
}
/**
* <p>Creates a new Trie with the settings for the trie data.</p>
- * <p>Unserialize the 32-bit-aligned input stream and use the data for the
+ * <p>Unserialize the 32-bit-aligned input stream and use the data for the
* trie.</p>
* @param bytes file buffer to a ICU data file, containing the trie
- * @param dataManipulate object which provides methods to parse the char
+ * @param dataManipulate object which provides methods to parse the char
* data
* @throws IOException thrown when data reading fails
*/
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
- offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
}
return m_initialValue_;
}
-
+
/**
* <p>Gets the latin 1 fast path value.</p>
- * <p>Note this only works if latin 1 characters have their own linear
+ * <p>Note this only works if latin 1 characters have their own linear
* array.</p>
* @param ch latin 1 characters
* @return value associated with latin character
*/
- public final int getLatin1LinearValue(char ch)
+ public final int getLatin1LinearValue(char ch)
{
return m_data_[INDEX_STAGE_3_MASK_ + 1 + ch];
}
* otherwise
*/
///CLOVER:OFF
- public boolean equals(Object other)
+ @Override
+ public boolean equals(Object other)
{
boolean result = super.equals(other);
if (result && other instanceof IntTrie) {
}
return false;
}
-
+
+ @Override
public int hashCode() {
assert false : "hashCode not designed";
return 42;
}
///CLOVER:ON
-
+
// protected methods -----------------------------------------------
/**
* data array</p>
* @param bytes data buffer containing trie data
*/
+ @Override
protected final void unserialize(ByteBuffer bytes)
{
super.unserialize(bytes);
* @param trail trailing surrogate
* @return offset to data
*/
+ @Override
protected final int getSurrogateOffset(char lead, char trail)
{
if (m_dataManipulate_ == null) {
// value: m_initialValue_
return -1;
}
-
+
/**
* Gets the value at the argument index.
* For use internally in TrieIterator
* @return 32 bit value
* @see com.ibm.icu.impl.TrieIterator
*/
+ @Override
protected final int getValue(int index)
{
return m_data_[index];
}
-
+
/**
* Gets the default initial value
- * @return 32 bit value
+ * @return 32 bit value
*/
+ @Override
protected final int getInitialValue()
{
return m_initialValue_;
}
// package private methods -----------------------------------------
-
+
/**
* Internal constructor for builder use
* @param index the index array to be slotted into this trie
m_dataLength_ = m_data_.length;
m_initialValue_ = initialvalue;
}
-
+
// private data members --------------------------------------------
/**
this.shorterFirst = shorterFirst ? 1 : -1;
}
+ @Override
public int compare(Iterable<T> a, Iterable<T> b) {
if (a == null) {
return b == null ? 0 : -shorterFirst;
path, locale.getBaseName());
}
+ @Override
public ULocale getLocale() {
return bundle.getULocale();
}
+ @Override
public String get(String tableName, String subTableName, String code) {
return ICUResourceTableAccess.getTableString(bundle, tableName, subTableName,
code, nullIfNotFound ? null : code);
return (DataTables) Class.forName(className).newInstance();
} catch (Throwable t) {
return new DataTables() {
+ @Override
public DataTable get(ULocale locale, boolean nullIfNotFound) {
return new DataTable(nullIfNotFound);
}
* Utility class to parse and normalize locale ids (including POSIX style)
*/
public final class LocaleIDParser {
-
+
/**
* Char array representing the locale ID.
*/
private char[] id;
-
+
/**
* Current position in {@link #id} (while parsing).
*/
private int index;
-
+
/**
* Temporary buffer for parsed sections of data.
*/
private StringBuilder buffer;
-
+
// um, don't handle POSIX ids unless we request it. why not? well... because.
private boolean canonicalize;
private boolean hadCountry;
}
// utilities for working on text in the buffer
-
+
/**
* Append c to the buffer.
*/
private void append(char c) {
buffer.append(c);
}
-
+
private void addSeparator() {
append(UNDERSCORE);
}
*/
private int parseLanguage() {
int startLength = buffer.length();
-
+
if (haveExperimentalLanguagePrefix()) {
append(AsciiUtil.toLower(id[0]));
append(HYPHEN);
if (!atTerminator()) {
int oldIndex = index;
++index;
-
+
char c;
while (!isTerminatorOrIDSeparator(c = next()) && AsciiUtil.isAlpha(c));
--index;
boolean skipping = false;
char c;
boolean firstPass = true;
-
+
while ((c = next()) != DONE) {
if (c == DOT) {
start = false;
private Comparator<String> getKeyComparator() {
final Comparator<String> comp = new Comparator<String>() {
+ @Override
public int compare(String lhs, String rhs) {
return lhs.compareTo(rhs);
}
}
private static CacheBase<String, Norm2AllModes, ByteBuffer> cache =
new SoftCache<String, Norm2AllModes, ByteBuffer>() {
+ @Override
protected Norm2AllModes createInstance(String key, ByteBuffer bytes) {
Normalizer2Impl impl;
if(bytes==null) {
// They assume that the cc or trailCC of their input is 0.
// Most of them implement Appendable interface methods.
// @Override when we switch to Java 6
+ @Override
public ReorderingBuffer append(char c) {
str.append(c);
lastCC=0;
reorderStart=str.length();
}
// @Override when we switch to Java 6
+ @Override
public ReorderingBuffer append(CharSequence s) {
if(s.length()!=0) {
str.append(s);
return this;
}
// @Override when we switch to Java 6
+ @Override
public ReorderingBuffer append(CharSequence s, int start, int limit) {
if(start!=limit) {
str.append(s, start, limit);
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0]==2;
}
}
}
private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() {
+ @Override
public int map(int in) {
return in&CANON_NOT_SEGMENT_STARTER;
}
}
if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
if((firstUnit&COMP_1_TRIPLE)!=0) {
- return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
+ return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
} else {
return compositions.charAt(list+1);
}
compositeAndFwd=maybeYesCompositions.charAt(list+1);
list+=2;
} else {
- compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
+ compositeAndFwd=((maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
maybeYesCompositions.charAt(list+2);
list+=3;
}
*
* a. Zone (table). A zone is a table resource contains several
* type of resources below:
- *
+ *
* - typeOffsets:intvector (Required)
- *
+ *
* Sets of UTC raw/dst offset pairs in seconds. Entries at
* 2n represents raw offset and 2n+1 represents dst offset
* paired with the raw offset at 2n. The very first pair represents
* the initial zone offset (before the first transition) always.
*
- * - trans:intvector (Optional)
- *
+ * - trans:intvector (Optional)
+ *
* List of transition times represented by 32bit seconds from the
* epoch (1970-01-01T00:00Z) in ascending order.
- *
+ *
* - transPre32/transPost32:intvector (Optional)
- *
+ *
* List of transition times before/after 32bit minimum seconds.
* Each time is represented by a pair of 32bit integer.
- *
+ *
* - typeMap:bin (Optional)
- *
+ *
* Array of bytes representing the mapping between each transition
* time (transPre32/trans/transPost32) and its corresponding offset
* data (typeOffsets).
- *
+ *
* - finalRule:string (Optional)
- *
+ *
* If a recurrent transition rule is applicable to a zone forever
* after the final transition time, finalRule represents the rule
* in Rules data.
- *
+ *
* - finalRaw:int (Optional)
- *
+ *
* When finalRule is available, finalRaw is required and specifies
* the raw (base) offset of the rule.
- *
+ *
* - finalYear:int (Optional)
- *
+ *
* When finalRule is available, finalYear is required and specifies
* the start year of the rule.
- *
+ *
* - links:intvector (Optional)
- *
+ *
* When this zone data is shared with other zones, links specifies
* all zones including the zone itself. Each zone is referenced by
* integer index.
- *
+ *
* b. Link (int, length 1). A link zone is an int resource. The
* integer is the zone number of the target zone. The key of this
* resource is an alternate name for the target zone. This data
int[] fields = Grego.timeToFields(current, null);
// Find start of this year, and start of next year
- long start = Grego.fieldsToDay(fields[0], 0, 1) * SECONDS_PER_DAY;
- long limit = Grego.fieldsToDay(fields[0] + 1, 0, 1) * SECONDS_PER_DAY;
+ long start = Grego.fieldsToDay(fields[0], 0, 1) * SECONDS_PER_DAY;
+ long limit = Grego.fieldsToDay(fields[0] + 1, 0, 1) * SECONDS_PER_DAY;
// Return TRUE if DST is observed at any time during the current
// year.
}
private void construct(UResourceBundle top, UResourceBundle res){
-
+
if ((top == null || res == null)) {
throw new IllegalArgumentException();
}
int idx = 0;
if (transPre32 != null) {
for (int i = 0; i < transPre32.length / 2; i++, idx++) {
- transitionTimes64[idx] =
- (((long)transPre32[i * 2]) & 0x00000000FFFFFFFFL) << 32
- | (((long)transPre32[i * 2 + 1]) & 0x00000000FFFFFFFFL);
+ transitionTimes64[idx] =
+ ((transPre32[i * 2]) & 0x00000000FFFFFFFFL) << 32
+ | ((transPre32[i * 2 + 1]) & 0x00000000FFFFFFFFL);
}
}
if (trans32 != null) {
for (int i = 0; i < trans32.length; i++, idx++) {
- transitionTimes64[idx] = (long)trans32[i];
+ transitionTimes64[idx] = trans32[i];
}
}
if (transPost32 != null) {
for (int i = 0; i < transPost32.length / 2; i++, idx++) {
- transitionTimes64[idx] =
- (((long)transPost32[i * 2]) & 0x00000000FFFFFFFFL) << 32
- | (((long)transPost32[i * 2 + 1]) & 0x00000000FFFFFFFFL);
+ transitionTimes64[idx] =
+ ((transPost32[i * 2]) & 0x00000000FFFFFFFFL) << 32
+ | ((transPost32[i * 2 + 1]) & 0x00000000FFFFFFFFL);
}
}
} else {
finalStartYear = r.getInt();
// Note: Setting finalStartYear to the finalZone is problematic. When a date is around
- // year boundary, SimpleTimeZone may return false result when DST is observed at the
+ // year boundary, SimpleTimeZone may return false result when DST is observed at the
// beginning of year. We could apply safe margin (day or two), but when one of recurrent
// rules falls around year boundary, it could return false result. Without setting the
// start year, finalZone works fine around the year boundary of the start year.
}
private int getInt(byte val){
- return val & 0xFF;
+ return val & 0xFF;
}
/*
buf.append(",finalStartMillis=" + finalStartMillis);
buf.append(",finalZone=" + finalZone);
buf.append(']');
-
+
return buf.toString();
}
* If and only if finalYear == INT32_MAX then finalZone == 0.
*/
private SimpleTimeZone finalZone = null; // owned, may be NULL
-
+
/**
* The canonical ID of this zone. Initialized when {@link #getCanonicalID()}
* is invoked first time, or {@link #setID(String)} is called.
private static final boolean DEBUG = ICUDebug.enabled("olson");
private static final int SECONDS_PER_DAY = 24*60*60;
-
+
private static UResourceBundle loadRule(UResourceBundle top, String ruleid) {
UResourceBundle r = top.get("Rules");
r = r.get(ruleid);
public int hashCode(){
int ret = (int) (finalStartYear ^ (finalStartYear>>>4) +
transitionCount ^ (transitionCount>>>6) +
- typeCount ^ (typeCount>>>8) +
+ typeCount ^ (typeCount>>>8) +
Double.doubleToLongBits(finalStartMillis)+
- (finalZone == null ? 0 : finalZone.hashCode()) +
+ (finalZone == null ? 0 : finalZone.hashCode()) +
super.hashCode());
if (transitionTimes64 != null) {
for(int i=0; i<transitionTimes64.length; i++){
if (typeMapData != null) {
for(int i=0; i<typeMapData.length; i++){
ret+=typeMapData[i] & 0xFF;
- }
+ }
}
return ret;
}
return finalZoneWithStartYear.getPreviousTransition(base, inclusive);
} else {
return firstFinalTZTransition;
- }
+ }
}
}
} else {
// Create a TimeArrayTimeZoneRule at finalMillis
rules[idx++] = new TimeArrayTimeZoneRule(getID() + "(STD)", finalZone.getRawOffset(), 0,
- new long[] {(long)finalStartMillis}, DateTimeRule.UTC_TIME);
+ new long[] {(long)finalStartMillis}, DateTimeRule.UTC_TIME);
}
}
return rules;
typeIdx = getInt(typeMapData[firstTZTransitionIdx]);
firstTZTransition = new TimeZoneTransition(transitionTimes64[firstTZTransitionIdx] * Grego.MILLIS_PER_SECOND,
initialRule, historicRules[typeIdx]);
-
+
}
}
/* (non-Javadoc)
* @see com.ibm.icu.util.TimeZone#isFrozen()
*/
+ @Override
public boolean isFrozen() {
return isFrozen;
}
/* (non-Javadoc)
* @see com.ibm.icu.util.TimeZone#freeze()
*/
+ @Override
public TimeZone freeze() {
isFrozen = true;
return this;
/* (non-Javadoc)
* @see com.ibm.icu.util.TimeZone#cloneAsThawed()
*/
+ @Override
public TimeZone cloneAsThawed() {
OlsonTimeZone tz = (OlsonTimeZone)super.cloneAsThawed();
if (finalZone != null) {
public IntTrieBuilder builder;
public int initialValue;
+ @Override
public void setRowIndexForErrorValue(int rowIndex) {
}
+ @Override
public void setRowIndexForInitialValue(int rowIndex) {
initialValue = rowIndex;
}
+ @Override
public void setRowIndexForRange(int start, int end, int rowIndex) {
builder.setRange(start, end + 1, rowIndex, true);
}
+ @Override
public void startRealValues(int rowIndex) {
if (rowIndex > 0xffff) {
// too many rows for a 16-bit trie
} else {
builder = new IntTrieBuilder(null, 100000, initialValue,
initialValue, false);
- }
+ }
}
}
/**
* Store bits (Unicode character properties) in bit set vectors.
- *
+ *
* This is a port of the C++ class UPropsVectors from ICU4C
- *
+ *
* @author Shaopeng Jia
* @internal
*/
/**
* Unicode Properties Vectors associated with code point ranges.
- *
+ *
* Rows of primitive integers in a contiguous array store the range limits and
* the properties vectors.
- *
+ *
* In each row, row[0] contains the start code point and row[1] contains the
* limit code point, which is the start of the next range.
- *
+ *
* Initially, there is only one range [0..0x110000] with values 0.
- *
+ *
* It would be possible to store only one range boundary per row, but
* self-contained rows allow to later sort them by contents.
*/
private boolean isCompacted;
// internal function to compare elements in v and target. Return true iff
- // elements in v starting from index1 to index1 + length - 1
+ // elements in v starting from index1 to index1 + length - 1
// are exactly the same as elements in target
// starting from index2 to index2 + length - 1
- private boolean areElementsSame(int index1, int[] target, int index2,
+ private boolean areElementsSame(int index1, int[] target, int index2,
int length) {
for (int i = 0; i < length; ++i) {
if (v[index1 + i] != target[index2 + i]) {
}
return true;
}
-
+
// internal function which given rangeStart, returns
// index where v[index]<=rangeStart<v[index+1].
// The returned index is a multiple of columns, and therefore
/*
* In rows for code points [start..end], select the column, reset the mask
* bits and set the value bits (ANDed with the mask).
- *
+ *
* @throws IllegalArgumentException
- *
+ *
* @throws IllegalStateException
- *
+ *
* @throws IndexOutOfBoundsException
*/
public void setValue(int start, int end, int column, int value, int mask) {
/*
* Returns an array which contains value elements
- * in row rowIndex.
+ * in row rowIndex.
*
* @throws IllegalStateException
* @throws IllegalArgumentException
/*
* Returns an int which is the start codepoint
* in row rowIndex.
- *
+ *
* @throws IllegalStateException
- *
+ *
* @throws IllegalArgumentException
*/
public int getRowStart(int rowIndex) {
}
/*
- * Returns an int which is the limit codepoint
+ * Returns an int which is the limit codepoint
* minus 1 in row rowIndex.
- *
+ *
* @throws IllegalStateException
- *
+ *
* @throws IllegalArgumentException
*/
public int getRowEnd(int rowIndex) {
}
return v[rowIndex * columns + 1] - 1;
}
-
+
/*
* Compact the vectors:
* - modify the memory
* - keep only unique vectors
* - store them contiguously from the beginning of the memory
- * - for each (non-unique) row, call the respective function in
+ * - for each (non-unique) row, call the respective function in
* CompactHandler
*
* The handler's rowIndex is the index of the row in the compacted
- * memory block. Therefore, it starts at 0 increases in increments of the
+ * memory block. Therefore, it starts at 0 increases in increments of the
* columns value.
*
* In a first phase, only special values are delivered (each exactly once).
* Then CompactHandler::startRealValues() is called
* where rowIndex is the length of the compacted array.
- * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
+ * Then, in the second phase, the CompactHandler::setRowIndexForRange() is
* called for each row of real values.
*/
public void compact(CompactHandler compactor) {
}
Arrays.sort(indexArray, new Comparator<Integer>() {
+ @Override
public int compare(Integer o1, Integer o2) {
int indexOfRow1 = o1.intValue();
int indexOfRow2 = o2.intValue();
compactor.startRealValues(count);
/*
- * Move vector contents up to a contiguous array with only unique
+ * Move vector contents up to a contiguous array with only unique
* vector values, and call the handler function for each vector.
- *
- * This destroys the Properties Vector structure and replaces it
+ *
+ * This destroys the Properties Vector structure and replaces it
* with an array of just vector values.
*/
int[] temp = new int[count];
// count a new values vector if it is different
// from the current one
- if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
+ if (count < 0 || !areElementsSame(indexArray[i].intValue() + 2,
temp, count, valueColumns)) {
count += valueColumns;
System.arraycopy(v, indexArray[i].intValue() + 2, temp, count,
}
}
v = temp;
-
+
// count is at the beginning of the last vector,
// add one to include that last vector
rows = count / valueColumns + 1;
/*
* Get the vectors array after calling compact().
- *
+ *
* @throws IllegalStateException
*/
public int[] getCompactedArray() {
/*
* Get the number of rows for the compacted array.
- *
+ *
* @throws IllegalStateException
*/
public int getCompactedRows() {
/*
* Get the number of columns for the compacted array.
- *
+ *
* @throws IllegalStateException
*/
public int getCompactedColumns() {
// inner class implementation of Trie.DataManipulate
private static class DefaultGetFoldingOffset implements Trie.DataManipulate {
+ @Override
public int getFoldingOffset(int value) {
return value;
}
builder = inBuilder;
}
+ @Override
public int getFoldedValue(int start, int offset) {
- int initialValue = builder.m_initialValue_;
+ int initialValue = builder.m_initialValue_;
int limit = start + 0x400;
while (start < limit) {
boolean[] inBlockZero = new boolean[1];
return 0;
}
}
-
+
public static interface CompactHandler {
public void setRowIndexForRange(int start, int end, int rowIndex);
public void setRowIndexForInitialValue(int rowIndex);
this.setCreator.newInstance(setComparatorParam); // check to make sure compiles
} else {
this.setCreator = ((Class<? extends Set<V>>)setCreator).getConstructor(Comparator.class);
- this.setCreator.newInstance(setComparatorParam); // check to make sure compiles
+ this.setCreator.newInstance(setComparatorParam); // check to make sure compiles
}
data = map == null ? new HashMap<K, Set<V>>() : map;
} catch (Exception e) {
public final Set<Entry<K, V>> entrySet() {
return keyValueSet();
}
-
+
public Set<Entry<K, Set<V>>> keyValuesSet() {
return data.entrySet();
}
-
+
public Set<Entry<K, V>> keyValueSet() {
Set<Entry<K, V>> result = new LinkedHashSet<Entry<K, V>>();
for (K key : data.keySet()) {
return result;
}
+ @Override
public boolean equals(Object o) {
if (o == null)
return false;
return data.get(key);
}
+ @Override
public int hashCode() {
return data.hashCode();
}
private Set<V> newSet() {
try {
- return (Set<V>) setCreator.newInstance(setComparatorParam);
+ return setCreator.newInstance(setComparatorParam);
} catch (Exception e) {
throw (RuntimeException) new IllegalArgumentException("Can't create new set").initCause(e);
}
return result;
}
+ @Override
public String toString() {
return data.toString();
}
this.value = e.getValue();
}
+ @Override
public K getKey() {
return key;
}
+ @Override
public V getValue() {
return value;
}
+ @Override
public V setValue(V value) {
V oldValue = this.value;
this.value = value;
volatile boolean frozen = false;
+ @Override
public boolean isFrozen() {
return frozen;
}
+ @Override
public Relation<K, V> freeze() {
if (!frozen) {
// does not handle one level down, so we do that on a case-by-case basis
return this;
}
+ @Override
public Relation<K, V> cloneAsThawed() {
// TODO do later
throw new UnsupportedOperationException();
public class ReplaceableUCharacterIterator extends UCharacterIterator {
// public constructor ------------------------------------------------------
-
+
/**
* Public constructor
* @param replaceable text which the iterator will be based on
this.replaceable = replaceable;
this.currentIndex = 0;
}
-
+
/**
* Public constructor
* @param str text which the iterator will be based on
this.replaceable = new ReplaceableString(str);
this.currentIndex = 0;
}
-
+
/**
* Public constructor
* @param buf buffer of text on which the iterator will be based
this.replaceable = new ReplaceableString(buf);
this.currentIndex = 0;
}
-
+
// public methods ----------------------------------------------------------
-
+
/**
- * Creates a copy of this iterator, does not clone the underlying
+ * Creates a copy of this iterator, does not clone the underlying
* <code>Replaceable</code>object
* @return copy of this iterator
*/
+ @Override
public Object clone(){
try {
return super.clone();
return null; // never invoked
}
}
-
+
/**
* Returns the current UTF16 character.
* @return current UTF16 character
*/
+ @Override
public int current(){
if (currentIndex < replaceable.length()) {
return replaceable.charAt(currentIndex);
}
return DONE;
}
-
+
/**
* Returns the current codepoint
* @return current codepoint
*/
+ @Override
public int currentCodePoint(){
- // cannot use charAt due to it different
+ // cannot use charAt due to it different
// behaviour when index is pointing at a
// trail surrogate, check for surrogates
-
+
int ch = current();
if(UTF16.isLeadSurrogate((char)ch)){
// advance the index to get the next code point
next();
- // due to post increment semantics current() after next()
+ // due to post increment semantics current() after next()
// actually returns the next char which is what we want
int ch2 = current();
// current should never change the current index so back off
previous();
-
+
if(UTF16.isTrailSurrogate((char)ch2)){
// we found a surrogate pair
return Character.toCodePoint((char)ch, (char)ch2);
}
return ch;
}
-
+
/**
* Returns the length of the text
* @return length of the text
*/
+ @Override
public int getLength(){
return replaceable.length();
}
-
+
/**
* Gets the current currentIndex in text.
* @return current currentIndex in text.
*/
+ @Override
public int getIndex(){
return currentIndex;
}
-
+
/**
- * Returns next UTF16 character and increments the iterator's currentIndex by 1.
- * If the resulting currentIndex is greater or equal to the text length, the
- * currentIndex is reset to the text length and a value of DONECODEPOINT is
- * returned.
- * @return next UTF16 character in text or DONE if the new currentIndex is off the
+ * Returns next UTF16 character and increments the iterator's currentIndex by 1.
+ * If the resulting currentIndex is greater or equal to the text length, the
+ * currentIndex is reset to the text length and a value of DONECODEPOINT is
+ * returned.
+ * @return next UTF16 character in text or DONE if the new currentIndex is off the
* end of the text range.
*/
+ @Override
public int next(){
if (currentIndex < replaceable.length()) {
return replaceable.charAt(currentIndex++);
}
return DONE;
}
-
-
+
+
/**
- * Returns previous UTF16 character and decrements the iterator's currentIndex by
- * 1.
- * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a
- * value of DONECODEPOINT is returned.
- * @return next UTF16 character in text or DONE if the new currentIndex is off the
+ * Returns previous UTF16 character and decrements the iterator's currentIndex by
+ * 1.
+ * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a
+ * value of DONECODEPOINT is returned.
+ * @return next UTF16 character in text or DONE if the new currentIndex is off the
* start of the text range.
*/
+ @Override
public int previous(){
if (currentIndex > 0) {
return replaceable.charAt(--currentIndex);
}
/**
- * <p>Sets the currentIndex to the specified currentIndex in the text and returns that
- * single UTF16 character at currentIndex.
+ * <p>Sets the currentIndex to the specified currentIndex in the text and returns that
+ * single UTF16 character at currentIndex.
* This assumes the text is stored as 16-bit code units.</p>
- * @param currentIndex the currentIndex within the text.
- * @exception IllegalArgumentException is thrown if an invalid currentIndex is
+ * @param currentIndex the currentIndex within the text.
+ * @exception IllegalArgumentException is thrown if an invalid currentIndex is
* supplied. i.e. currentIndex is out of bounds.
- * @returns the character at the specified currentIndex or DONE if the specified
+ * @returns the character at the specified currentIndex or DONE if the specified
* currentIndex is equal to the end of the text.
*/
+ @Override
public void setIndex(int currentIndex) throws IndexOutOfBoundsException{
if (currentIndex < 0 || currentIndex > replaceable.length()) {
throw new IndexOutOfBoundsException();
}
this.currentIndex = currentIndex;
}
-
+
+ @Override
public int getText(char[] fillIn, int offset){
int length = replaceable.length();
if(offset < 0 || offset + length > fillIn.length){
}
replaceable.getChars(0,length,fillIn,offset);
return length;
- }
-
+ }
+
// private data members ----------------------------------------------------
-
+
/**
* Replacable object
*/
import com.ibm.icu.util.UResourceBundle;
/**
- * just a wrapper for Java ListResourceBundles and
+ * just a wrapper for Java ListResourceBundles and
* @author ram
*
*/
this.bundle=bundle;
}
+ @Override
protected Object handleGetObject(String aKey){
ResourceBundleWrapper current = this;
Object obj = null;
}
return obj;
}
-
+
+ @Override
public Enumeration<String> getKeys(){
return Collections.enumeration(keys);
}
-
+
private void initKeysVector(){
ResourceBundleWrapper current = this;
keys = new ArrayList<String>();
current = (ResourceBundleWrapper)current.getParent();
}
}
+ @Override
protected String getLocaleID(){
- return localeID;
+ return localeID;
}
-
+
+ @Override
protected String getBaseName(){
- return bundle.getClass().getName().replace('.','/');
+ return bundle.getClass().getName().replace('.','/');
}
-
+
+ @Override
public ULocale getULocale(){
- return new ULocale(localeID);
+ return new ULocale(localeID);
}
-
+
+ @Override
public UResourceBundle getParent(){
- return (UResourceBundle)parent;
+ return (UResourceBundle)parent;
}
// Flag for enabling/disabling debugging code
private static final boolean DEBUG = ICUDebug.enabled("resourceBundleWrapper");
-
+
// This method is for super class's instantiateBundle method
public static ResourceBundleWrapper getBundleInstance(String baseName, String localeID,
ClassLoader root, boolean disableFallback) {
final String resName = name.replace('.', '/') + ".properties";
InputStream stream = java.security.AccessController.doPrivileged(
new java.security.PrivilegedAction<InputStream>() {
+ @Override
public InputStream run() {
return root.getResourceAsStream(resName);
}
return this;
}
+ @Override
public int hashCode() {
int sum = items.length;
for (Object item : items) {
return sum;
}
+ @Override
public boolean equals(Object other) {
if (other == null) {
return false;
}
}
+ @Override
public int compareTo(Object other) {
int result;
Row<C0, C1, C2, C3, C4> that = (Row<C0, C1, C2, C3, C4>)other;
return 0;
}
+ @Override
public String toString() {
StringBuilder result = new StringBuilder("[");
boolean first = true;
return result.append("]").toString();
}
+ @Override
public boolean isFrozen() {
return frozen;
}
+ @Override
public Row<C0, C1, C2, C3, C4> freeze() {
frozen = true;
return this;
}
+ @Override
public Object clone() {
if (frozen) return this;
try {
}
}
+ @Override
public Row<C0, C1, C2, C3, C4> cloneAsThawed() {
try {
Row<C0, C1, C2, C3, C4> result = (Row<C0, C1, C2, C3, C4>) super.clone();
/**
* Text being iterated.
- */
+ */
private String text;
/**
* PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded
* to its value. Escapes are parsed using Utility.unescapeAt().
*/
- public static final int PARSE_ESCAPES = 2;
+ public static final int PARSE_ESCAPES = 2;
/**
* Bitmask option to enable skipping of whitespace. If (options &
this.pos = pos;
buf = null;
}
-
+
/**
* Returns true if this iterator has no more characters to return.
*/
* Position within an expanded variable is <em>not</em> indicated.
* @return a string representation of this object
*/
+ @Override
public String toString() {
int b = pos.getIndex();
return text.substring(0, b) + '|' + text.substring(b);
return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
}
}
-
+
/**
* Advances the position by the given amount.
* @param count the number of 16-bit code units to advance past
}
}
+ @Override
public V get(Object key) {
Reference<Map<K, V>> ref = cacheRef;
if (ref != null) {
return null;
}
+ @Override
public void put(K key, V value) {
Reference<Map<K, V>> ref = cacheRef;
Map<K, V> map = null;
map.put(key, value);
}
+ @Override
public void clear() {
cacheRef = null;
}
return ICUBinary.getChars(byteBuffer, length, 0);
}
+ @Override
public boolean isDataVersionAcceptable(byte version[]){
- return version[0] == DATA_FORMAT_VERSION[0]
- && version[2] == DATA_FORMAT_VERSION[2]
+ return version[0] == DATA_FORMAT_VERSION[0]
+ && version[2] == DATA_FORMAT_VERSION[2]
&& version[3] == DATA_FORMAT_VERSION[3];
}
public int[] readIndexes(int length)throws IOException{
indexes[i] = byteBuffer.getInt();
}
return indexes;
- }
+ }
public byte[] getUnicodeVersion(){
return ICUBinary.getVersionByteArrayFromCompactInt(unicodeVersion);
}
public static final Comparator<int[]> COMPARE_INT_ARRAYS = new Comparator<int[]>() {
+ @Override
public int compare(int[] o1, int[] o2) {
int minIndex = Math.min(o1.length, o2.length);
for (int i = 0; i < minIndex; ++i) {
}
}
// We failed to find continuation. Add what we have and restart
- adder.add(start, end == null ? null
- : !shorterPairs ? end
+ adder.add(start, end == null ? null
+ : !shorterPairs ? end
: end.substring(prefixLen, end.length()));
}
// new possible range
lastCp = s.codePointBefore(s.length());
prefixLen = s.length() - Character.charCount(lastCp);
}
- adder.add(start, end == null ? null
- : !shorterPairs ? end
+ adder.add(start, end == null ? null
+ : !shorterPairs ? end
: end.substring(prefixLen, end.length()));
} else {
// not a fast algorithm, but ok for now
// first sort by lengths
Relation<Integer,Ranges> lengthToArrays = Relation.of(new TreeMap<Integer,Set<Ranges>>(), TreeSet.class);
for (String s : source) {
- Ranges item = new Ranges(s);
+ Ranges item = new Ranges(s);
lengthToArrays.put(item.size(), item);
}
// then compact items of each length and emit compacted sets
for (Entry<Integer, Set<Ranges>> entry : lengthToArrays.keyValuesSet()) {
LinkedList<Ranges> compacted = compact(entry.getKey(), entry.getValue());
- for (Ranges ranges : compacted) {
+ for (Ranges ranges : compacted) {
adder.add(ranges.start(), ranges.end(shorterPairs));
}
}
}
}
-
+
/**
* Faster but not as good compaction. Only looks at final codepoint.
* @param source set of strings
public boolean equals(Object obj) {
return this == obj || (obj != null && obj instanceof Range && compareTo((Range)obj) == 0);
}
+ @Override
public int compareTo(Range that) {
int diff = min - that.min;
if (diff != 0) {
if (DEBUG) System.out.println(" => " + this);
return true;
}
-
+
public String start() {
StringBuilder result = new StringBuilder();
for (int i = 0; i < ranges.length; ++i) {
public Integer size() {
return ranges.length;
}
+ @Override
public int compareTo(Ranges other) {
int diff = ranges.length - other.ranges.length;
if (diff != 0) {
add(0, startOffset, startCps, endCps, builder, output);
return output;
}
-
+
private static void add(int endIndex, int startOffset, int[] starts, int[] ends, StringBuilder builder, Collection<String> output) {
int start = starts[endIndex+startOffset];
int end = ends[endIndex];
* Yet another TimeZoneNames implementation based on the tz database.
* This implementation contains only tz abbreviations (short standard
* and daylight names) for each metazone.
- *
+ *
* The data file $ICU4C_ROOT/source/data/zone/tzdbNames.txt contains
* the metazone - abbreviations mapping data (manually edited).
- *
+ *
* Note: The abbreviations in the tz database are not necessarily
* unique. For example, parsing abbreviation "IST" is ambiguous
* (can be parsed as India Standard Time or Israel Standard Time).
public class TZDBTimeZoneNames extends TimeZoneNames {
private static final long serialVersionUID = 1L;
- private static final ConcurrentHashMap<String, TZDBNames> TZDB_NAMES_MAP =
+ private static final ConcurrentHashMap<String, TZDBNames> TZDB_NAMES_MAP =
new ConcurrentHashMap<String, TZDBNames>();
private static volatile TextTrieMap<TZDBNameInfo> TZDB_NAMES_TRIE = null;
*/
@Override
public String getMetaZoneDisplayName(String mzID, NameType type) {
- if (mzID == null || mzID.length() == 0 ||
+ if (mzID == null || mzID.length() == 0 ||
(type != NameType.SHORT_STANDARD && type != NameType.SHORT_DAYLIGHT)) {
return null;
}
* @see com.ibm.icu.impl.TextTrieMap.ResultHandler#handlePrefixMatch(int,
* java.util.Iterator)
*/
+ @Override
public boolean handlePrefixMatch(int matchLength, Iterator<TZDBNameInfo> values) {
TZDBNameInfo match = null;
TZDBNameInfo defaultRegionMatch = null;
/**
* Constructs a TextTrieMap object.
- *
+ *
* @param ignoreCase true to use simple case insensitive match
*/
public TextTrieMap(boolean ignoreCase) {
/**
* Adds the text key and its associated object in this object.
- *
+ *
* @param text The text.
* @param val The value object associated with the text.
*/
/**
* Gets an iterator of the objects associated with the
* longest prefix matching string key.
- *
+ *
* @param text The text to be matched with prefixes.
* @return An iterator of the objects associated with
* the longest prefix matching matching key, or null
/**
* Gets an iterator of the objects associated with the
- * longest prefix matching string key starting at the
+ * longest prefix matching string key starting at the
* specified position.
- *
+ *
* @param text The text to be matched with prefixes.
* @param start The start index of of the text
* @return An iterator of the objects associated with the
- * longest prefix matching matching key, or null if no
+ * longest prefix matching matching key, or null if no
* matching entry is found.
*/
public Iterator<V> get(CharSequence text, int start) {
/* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
+ @Override
public boolean hasNext() {
if (_nextIdx == _text.length() && _remainingChar == null) {
return false;
/* (non-Javadoc)
* @see java.util.Iterator#next()
*/
+ @Override
public Character next() {
if (_nextIdx == _text.length() && _remainingChar == null) {
return null;
/* (non-Javadoc)
* @see java.util.Iterator#remove()
*/
+ @Override
public void remove() {
throw new UnsupportedOperationException("remove() not supproted");
}
public interface ResultHandler<V> {
/**
* Handles a prefix key match
- *
+ *
* @param matchLength Matched key's length
* @param values An iterator of the objects associated with the matched key
* @return Return true to continue the search in the trie, false to quit.
private Iterator<V> matches = null;
private int length = 0;
+ @Override
public boolean handlePrefixMatch(int matchLength, Iterator<V> values) {
if (matchLength > length) {
length = matchLength;
* @since ICU 2.8
*/
public class TimeZoneAdapter extends java.util.TimeZone {
-
+
// Generated by serialver from JDK 1.4.1_01
static final long serialVersionUID = -2040072218820018557L;
-
+
/**
* The contained com.ibm.icu.util.TimeZone object. Must not be null.
* We delegate all methods to this object.
*/
private TimeZone zone;
-
+
/**
* Given a java.util.TimeZone, wrap it in the appropriate adapter
* subclass of com.ibm.icu.util.TimeZone and return the adapter.
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public void setID(String ID) {
super.setID(ID);
zone.setID(ID);
- }
+ }
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public boolean hasSameRules(java.util.TimeZone other) {
return other instanceof TimeZoneAdapter &&
zone.hasSameRules(((TimeZoneAdapter)other).zone);
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public int getOffset(int era, int year, int month, int day, int dayOfWeek,
int millis) {
return zone.getOffset(era, year, month, day, dayOfWeek, millis);
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public int getRawOffset() {
return zone.getRawOffset();
}
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public void setRawOffset(int offsetMillis) {
zone.setRawOffset(offsetMillis);
}
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public boolean useDaylightTime() {
return zone.useDaylightTime();
}
/**
* TimeZone API; calls through to wrapped time zone.
*/
+ @Override
public boolean inDaylightTime(Date date) {
return zone.inDaylightTime(date);
}
/**
* Boilerplate API; calls through to wrapped object.
*/
+ @Override
public Object clone() {
return new TimeZoneAdapter((TimeZone)zone.clone());
}
/**
* Boilerplate API; calls through to wrapped object.
*/
+ @Override
public synchronized int hashCode() {
return zone.hashCode();
}
/**
* Boilerplate API; calls through to wrapped object.
*/
+ @Override
public boolean equals(Object obj) {
if (obj instanceof TimeZoneAdapter) {
obj = ((TimeZoneAdapter) obj).zone;
* Returns a string representation of this object.
* @return a string representation of this object.
*/
+ @Override
public String toString() {
return "TimeZoneAdapter: " + zone.toString();
}
* Format pattern enum used for composing location and partial location names
*/
public enum Pattern {
- // The format pattern such as "{0} Time", where {0} is the country or city.
+ // The format pattern such as "{0} Time", where {0} is the country or city.
REGION_FORMAT("regionFormat", "({0})"),
// Note: FALLBACK_REGION_FORMAT is no longer used since ICU 50/CLDR 22.1
/**
* Returns the display name of the time zone for the given name type
* at the given date, or null if the display name is not available.
- *
+ *
* @param tz the time zone
* @param type the generic name type - see {@link GenericNameType}
* @param date the date
/**
* Returns the generic location name for the given canonical time zone ID.
- *
+ *
* @param canonicalTzID the canonical time zone ID
* @return the generic location name for the given canonical time zone ID.
*/
/**
* Private method to get a generic string, with fallback logics involved,
* that is,
- *
+ *
* 1. If a generic non-location string is available for the zone, return it.
- * 2. If a generic non-location string is associated with a meta zone and
+ * 2. If a generic non-location string is associated with a meta zone and
* the zone never use daylight time around the given date, use the standard
* string (if available).
* 3. If a generic non-location string is associated with a meta zone and
* current locale, then return the generic partial location string (if available)
* 4. If a generic non-location string is not available, use generic location
* string.
- *
+ *
* @param tz the requested time zone
* @param date the date
* @param type the generic name type, either LONG or SHORT
* Private simple pattern formatter used for formatting generic location names
* and partial location names. We intentionally use JDK MessageFormat
* for performance reason.
- *
+ *
* @param pat the message pattern enum
* @param args the format argument(s)
* @return the formatted string
* instance. Because LocaleDisplayNames is only used for generic
* location formant and partial location format, the LocaleDisplayNames
* is instantiated lazily.
- *
+ *
* @return the instance of LocaleDisplayNames for the locale of this object.
*/
private synchronized LocaleDisplayNames getLocaleDisplayNames() {
return;
}
// getGenericLocationName() formats a name and put it into the trie
- getGenericLocationName(tzCanonicalID);
+ getGenericLocationName(tzCanonicalID);
// Generic partial location format
Set<String> mzIDs = _tznames.getAvailableMetaZoneIDs(tzCanonicalID);
* the locale of this instance. When a generic name is coming from
* a meta zone, this region is used for checking if the time zone
* is a reference zone of the meta zone.
- *
+ *
* @return the target region
*/
private synchronized String getTargetRegion() {
* Private method for formatting partial location names. This format
* is used when a generic name of a meta zone is available, but the given
* time zone is not a reference zone (golden zone) of the meta zone.
- *
+ *
* @param tzID the canonical time zone ID
* @param mzID the meta zone ID
* @param isLong true when long generic name
/* (non-Javadoc)
* @see com.ibm.icu.impl.TextTrieMap.ResultHandler#handlePrefixMatch(int, java.util.Iterator)
*/
+ @Override
public boolean handlePrefixMatch(int matchLength, Iterator<NameInfo> values) {
while (values.hasNext()) {
NameInfo info = values.next();
nameTypes.add(NameType.SHORT_GENERIC);
nameTypes.add(NameType.SHORT_STANDARD);
}
-
+
if (!nameTypes.isEmpty()) {
// Find matches in the TimeZoneNames
tznamesMatches = _tznames.find(text, start, nameTypes);
protected TimeZoneGenericNames createInstance(String key, ULocale data) {
return new TimeZoneGenericNames(data).freeze();
}
-
+
}
/*
/**
* {@inheritDoc}
*/
+ @Override
public boolean isFrozen() {
return _frozen;
}
/**
* {@inheritDoc}
*/
+ @Override
public TimeZoneGenericNames freeze() {
_frozen = true;
return this;
/**
* {@inheritDoc}
*/
+ @Override
public TimeZoneGenericNames cloneAsThawed() {
TimeZoneGenericNames copy = null;
try {
import com.ibm.icu.text.UTF16;
/**
- * <p>A trie is a kind of compressed, serializable table of values
+ * <p>A trie is a kind of compressed, serializable table of values
* associated with Unicode code points (0..0x10ffff).</p>
- * <p>This class defines the basic structure of a trie and provides methods
+ * <p>This class defines the basic structure of a trie and provides methods
* to <b>retrieve the offsets to the actual data</b>.</p>
* <p>Data will be the form of an array of basic types, char or int.</p>
* <p>The actual data format will have to be specified by the user in the
* to the fromOffsetTrail() methods.
* To handle such supplementary codepoints, some offset information are kept
* in the data.</p>
- * <p>Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve
+ * <p>Methods in com.ibm.icu.impl.Trie.DataManipulate are called to retrieve
* that offset from the folded value for the lead surrogate unit.</p>
- * <p>For examples of use, see com.ibm.icu.impl.CharTrie or
+ * <p>For examples of use, see com.ibm.icu.impl.CharTrie or
* com.ibm.icu.impl.IntTrie.</p>
* @author synwee
* @see com.ibm.icu.impl.CharTrie
public abstract class Trie
{
// public class declaration ----------------------------------------
-
+
/**
* Character data in com.ibm.impl.Trie have different user-specified format
* for different purposes.
* This interface specifies methods to be implemented in order for
- * com.ibm.impl.Trie, to surrogate offset information encapsulated within
+ * com.ibm.impl.Trie, to surrogate offset information encapsulated within
* the data.
*/
public static interface DataManipulate
{
/**
- * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's
+ * Called by com.ibm.icu.impl.Trie to extract from a lead surrogate's
* data
* the index array offset of the indexes for that lead surrogate.
* @param value data value for a surrogate from the trie, including the
* folding offset
* @return data offset or 0 if there is no data for the lead surrogate
*/
- public int getFoldingOffset(int value);
+ public int getFoldingOffset(int value);
}
// default implementation
private static class DefaultGetFoldingOffset implements DataManipulate {
+ @Override
public int getFoldingOffset(int value) {
- return value;
+ return value;
}
}
// public methods --------------------------------------------------
-
+
/**
* Determines if this trie has a linear latin 1 array
* @return true if this trie has a linear latin 1 array, false otherwise
{
return m_isLatin1Linear_;
}
-
+
/**
* Checks if the argument Trie has the same data as this Trie.
* Attributes are checked but not the index data.
* otherwise
*/
///CLOVER:OFF
- public boolean equals(Object other)
+ @Override
+ public boolean equals(Object other)
{
if (other == this) {
return true;
&& m_dataLength_ == othertrie.m_dataLength_
&& Arrays.equals(m_index_, othertrie.m_index_);
}
-
+
+ @Override
public int hashCode() {
assert false : "hashCode not designed";
return 42;
}
///CLOVER:ON
-
+
/**
- * Gets the serialized data file size of the Trie. This is used during
- * trie data reading for size checking purposes.
+ * Gets the serialized data file size of the Trie. This is used during
+ * trie data reading for size checking purposes.
* @return size size of serialized trie data file in terms of the number
* of bytes
*/
* Trie constructor
* @param index array to be used for index
* @param options used by the trie
- * @param dataManipulate object containing the information to parse the
+ * @param dataManipulate object containing the information to parse the
* trie data
*/
protected Trie(char index[], int options, DataManipulate dataManipulate)
* Surrogate mask to use when shifting offset to retrieve supplementary
* values
*/
- protected static final int SURROGATE_MASK_ = 0x3FF;
+ protected static final int SURROGATE_MASK_ = 0x3FF;
/**
* Index or UTF16 characters
*/
*/
protected DataManipulate m_dataManipulate_;
/**
- * Start index of the data portion of the trie. CharTrie combines
- * index and data into a char array, so this is used to indicate the
+ * Start index of the data portion of the trie. CharTrie combines
+ * index and data into a char array, so this is used to indicate the
* initial offset to the data portion.
* Note this index always points to the initial value.
*/
protected int m_dataOffset_;
/**
- * Length of the data array
+ * Length of the data array
*/
protected int m_dataLength_;
-
+
// protected methods -----------------------------------------------
/**
* @return offset to data
*/
protected abstract int getSurrogateOffset(char lead, char trail);
-
+
/**
* Gets the value at the argument index
* @param index value at index will be retrieved
- * @return 32 bit value
+ * @return 32 bit value
*/
protected abstract int getValue(int index);
/**
* Gets the default initial value
- * @return 32 bit value
+ * @return 32 bit value
*/
protected abstract int getInitialValue();
-
+
/**
* Gets the offset to the data which the index ch after variable offset
* points to.
*/
protected final int getRawOffset(int offset, char ch)
{
- return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)]
- << INDEX_STAGE_2_SHIFT_)
+ return (m_index_[offset + (ch >> INDEX_STAGE_1_SHIFT_)]
+ << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
}
-
+
/**
* Gets the offset to data which the BMP character points to
* Treats a lead surrogate as a normal code point.
*/
protected final int getBMPOffset(char ch)
{
- return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
- && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
+ return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
+ && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
? getRawOffset(LEAD_INDEX_OFFSET_, ch)
- : getRawOffset(0, ch);
+ : getRawOffset(0, ch);
// using a getRawOffset(ch) makes no diff
}
return getRawOffset(0, (char)ch);
} else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
// BMP codepoint
- return getBMPOffset((char)ch);
+ return getBMPOffset((char)ch);
} else if (ch <= UCharacter.MAX_VALUE) {
// look at the construction of supplementary characters
// trail forms the ends of it.
- return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
+ return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
(char)(ch & SURROGATE_MASK_));
} else {
- // return -1 if there is an error, in this case we return
+ // return -1 if there is an error, in this case we return
return -1;
}
}
private static final int HEADER_OPTIONS_SHIFT_MASK_ = 0xF;
protected static final int HEADER_OPTIONS_INDEX_SHIFT_ = 4;
protected static final int HEADER_OPTIONS_DATA_IS_32_BIT_ = 0x100;
-
+
/**
* Flag indicator for Latin quick access data block
*/
private boolean m_isLatin1Linear_;
-
+
/**
* <p>Trie options field.</p>
* <p>options bit field:<br>
* 3..0 INDEX_STAGE_2_SHIFT // 1..9<br>
*/
private int m_options_;
-
+
// private methods ---------------------------------------------------
-
+
/**
* Authenticates raw data header.
* Checking the header information, signature and options.
return false;
}
- if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) !=
+ if ((m_options_ & HEADER_OPTIONS_SHIFT_MASK_) !=
INDEX_STAGE_1_SHIFT_ ||
((m_options_ >> HEADER_OPTIONS_INDEX_SHIFT_) &
HEADER_OPTIONS_SHIFT_MASK_)
* character properties.
*
* This is the second common version of a Unicode trie (hence the name Trie2).
- *
+ *
*/
public abstract class Trie2 implements Iterable<Trie2.Range> {
byte sig[] = new byte[4];
int read = is.read(sig);
is.reset();
-
+
if (read != sig.length) {
return 0;
}
-
+
if (sig[0]=='T' && sig[1]=='r' && sig[2]=='i' && sig[3]=='e') {
return 1;
}
*/
abstract public int get(int codePoint);
-
+
/**
* Get the trie value for a UTF-16 code unit.
*
* range, one for lead surrogates, which is the value that will be
* returned by this function, and a second value that is returned
* by Trie2.get().
- *
+ *
* For code units outside of the lead surrogate range, this function
* returns the same result as Trie2.get().
- *
+ *
* This function, together with the alternate value for lead surrogates,
* makes possible very efficient processing of UTF-16 strings without
* first converting surrogate pairs to their corresponding 32 bit code point
* values.
- *
+ *
* At build-time, enumerate the contents of the Trie2 to see if there
* is non-trivial (non-initialValue) data for any of the supplementary
* code points associated with a lead surrogate.
*
* At runtime, use Trie2.getFromU16SingleLead(). If there is non-trivial
* data and the code unit is a lead surrogate, then check if a trail surrogate
- * follows. If so, assemble the supplementary code point and look up its value
+ * follows. If so, assemble the supplementary code point and look up its value
* with Trie2.get(); otherwise reset the lead
* surrogate's value or do a code point lookup for it.
*
* If there is only trivial data for lead and trail surrogates, then processing
* can often skip them. For example, in normalization or case mapping
* all characters that do not have any mappings are simply copied as is.
- *
+ *
* @param c the code point or lead surrogate value.
* @return the value
*/
abstract public int getFromU16SingleLead(char c);
-
+
/**
* Equals function. Two Tries are equal if their contents are equal.
- * The type need not be the same, so a Trie2Writable will be equal to
+ * The type need not be the same, so a Trie2Writable will be equal to
* (read-only) Trie2_16 or Trie2_32 so long as they are storing the same values.
- *
+ *
*/
+ @Override
public final boolean equals(Object other) {
if(!(other instanceof Trie2)) {
return false;
}
Trie2 OtherTrie = (Trie2)other;
Range rangeFromOther;
-
+
Iterator<Trie2.Range> otherIter = OtherTrie.iterator();
for (Trie2.Range rangeFromThis: this) {
if (otherIter.hasNext() == false) {
if (otherIter.hasNext()) {
return false;
}
-
+
if (errorValue != OtherTrie.errorValue ||
initialValue != OtherTrie.initialValue) {
return false;
}
-
+
return true;
}
-
-
+
+
+ @Override
public int hashCode() {
if (fHash == 0) {
int hash = initHash();
}
return fHash;
}
-
+
/**
* When iterating over the contents of a Trie2, Elements of this type are produced.
- * The iterator will return one item for each contiguous range of codepoints having the same value.
- *
+ * The iterator will return one item for each contiguous range of codepoints having the same value.
+ *
* When iterating, the same Trie2EnumRange object will be reused and returned for each range.
* If you need to retain complete iteration results, clone each returned Trie2EnumRange,
* or save the range in some other way, before advancing to the next iteration step.
public int endCodePoint; // Inclusive.
public int value;
public boolean leadSurrogate;
-
+
+ @Override
public boolean equals(Object other) {
if (other == null || !(other.getClass().equals(getClass()))) {
return false;
Range tother = (Range)other;
return this.startCodePoint == tother.startCodePoint &&
this.endCodePoint == tother.endCodePoint &&
- this.value == tother.value &&
+ this.value == tother.value &&
this.leadSurrogate == tother.leadSurrogate;
}
-
-
+
+
+ @Override
public int hashCode() {
int h = initHash();
h = hashUChar32(h, startCodePoint);
return h;
}
}
-
-
+
+
/**
* Create an iterator over the value ranges in this Trie2.
* Values from the Trie2 are not remapped or filtered, but are returned as they
* are stored in the Trie2.
- *
+ *
* @return an Iterator
*/
+ @Override
public Iterator<Range> iterator() {
return iterator(defaultValueMapper);
}
-
+
private static ValueMapper defaultValueMapper = new ValueMapper() {
- public int map(int in) {
+ @Override
+ public int map(int in) {
return in;
}
};
-
+
/**
* Create an iterator over the value ranges from this Trie2.
* Values from the Trie2 are passed through a caller-supplied remapping function,
* and it is the remapped values that determine the ranges that
* will be produced by the iterator.
- *
- *
+ *
+ *
* @param mapper provides a function to remap values obtained from the Trie2.
* @return an Iterator
*/
return new Trie2Iterator(mapper);
}
-
+
/**
* Create an iterator over the Trie2 values for the 1024=0x400 code points
* corresponding to a given lead surrogate.
* be used to remap the values from the Trie2. The remapped values will be used
* both in determining the ranges of codepoints and as the value to be returned
* for each range.
- *
+ *
* Example of use, with an anonymous subclass of TrieValueMapper:
- *
- *
+ *
+ *
* ValueMapper m = new ValueMapper() {
* int map(int in) {return in & 0x1f;};
* }
* Trie2EnumRange r = i.next();
* ... // Do something with the range r.
* }
- *
+ *
*/
public interface ValueMapper {
public int map(int originalVal);
}
-
+
/**
* Serialize a trie2 Header and Index onto an OutputStream. This is
* @param dos the stream to which the serialized Trie2 data will be written.
* @return the number of bytes written.
*/
- protected int serializeHeader(DataOutputStream dos) throws IOException {
+ protected int serializeHeader(DataOutputStream dos) throws IOException {
// Write the header. It is already set and ready to use, having been
// created when the Trie2 was unserialized or when it was frozen.
int bytesWritten = 0;
-
- dos.writeInt(header.signature);
+
+ dos.writeInt(header.signature);
dos.writeShort(header.options);
dos.writeShort(header.indexLength);
dos.writeShort(header.shiftedDataLength);
dos.writeShort(header.dataNullOffset);
dos.writeShort(header.shiftedHighStart);
bytesWritten += 16;
-
+
// Write the index
int i;
for (i=0; i< header.indexLength; i++) {
dos.writeChar(index[i]);
}
- bytesWritten += header.indexLength;
- return bytesWritten;
+ bytesWritten += header.indexLength;
+ return bytesWritten;
}
-
-
+
+
/**
* Struct-like class for holding the results returned by a UTrie2 CharSequence iterator.
* The iteration walks over a CharSequence, and for each Unicode code point therein
* returns the character and its associated Trie2 value.
*/
- public static class CharSequenceValues {
+ public static class CharSequenceValues {
/** string index of the current code point. */
- public int index;
+ public int index;
/** The code point at index. */
- public int codePoint;
+ public int codePoint;
/** The Trie2 value for the current code point */
- public int value;
+ public int value;
}
-
+
/**
* Create an iterator that will produce the values from the Trie2 for
* the sequence of code points in an input text.
- *
+ *
* @param text A text string to be iterated over.
* @param index The starting iteration position within the input text.
* @return the CharSequenceIterator
public CharSequenceIterator charSequenceIterator(CharSequence text, int index) {
return new CharSequenceIterator(text, index);
}
-
+
// TODO: Survey usage of the equivalent of CharSequenceIterator in ICU4C
// and if there is none, remove it from here.
// Don't waste time testing and maintaining unused code.
-
+
/**
* An iterator that operates over an input CharSequence, and for each Unicode code point
* in the input returns the associated value from the Trie2.
- *
+ *
* The iterator can move forwards or backwards, and can be reset to an arbitrary index.
- *
+ *
* Note that Trie2_16 and Trie2_32 subclass Trie2.CharSequenceIterator. This is done
* only for performance reasons. It does require that any changes made here be propagated
* into the corresponding code in the subclasses.
/**
* Internal constructor.
*/
- CharSequenceIterator(CharSequence t, int index) {
+ CharSequenceIterator(CharSequence t, int index) {
text = t;
textLength = text.length();
set(index);
}
-
+
private CharSequence text;
private int textLength;
private int index;
private Trie2.CharSequenceValues fResults = new Trie2.CharSequenceValues();
-
-
+
+
public void set(int i) {
if (i < 0 || i > textLength) {
throw new IndexOutOfBoundsException();
}
index = i;
}
-
-
+
+
+ @Override
public final boolean hasNext() {
return index<textLength;
}
-
-
+
+
public final boolean hasPrevious() {
return index>0;
}
-
+
+ @Override
public Trie2.CharSequenceValues next() {
int c = Character.codePointAt(text, index);
int val = get(c);
index++;
if (c >= 0x10000) {
index++;
- }
+ }
return fResults;
}
-
+
public Trie2.CharSequenceValues previous() {
int c = Character.codePointBefore(text, index);
int val = get(c);
fResults.value = val;
return fResults;
}
-
- /**
+
+ /**
* Iterator.remove() is not supported by Trie2.CharSequenceIterator.
* @throws UnsupportedOperationException Always thrown because this operation is not supported
* @see java.util.Iterator#remove()
*/
+ @Override
public void remove() {
- throw new UnsupportedOperationException("Trie2.CharSequenceIterator does not support remove().");
+ throw new UnsupportedOperationException("Trie2.CharSequenceIterator does not support remove().");
}
}
-
-
+
+
//--------------------------------------------------------------------------------
//
// Below this point are internal implementation items. No further public API.
//
//--------------------------------------------------------------------------------
-
-
+
+
/**
* Selectors for the width of a UTrie2 data value.
- */
+ */
enum ValueWidth {
BITS_16,
BITS_32
}
-
+
/**
* Trie2 data structure in serialized form:
*
* UTrie2Header header;
* uint16_t index[header.index2Length];
* uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...]
- *
+ *
* For Java, this is read from the stream into an instance of UTrie2Header.
* (The C version just places a struct over the raw serialized data.)
- *
+ *
* @internal
*/
static class UTrie2Header {
/** "Tri2" in big-endian US-ASCII (0x54726932) */
int signature;
-
+
/**
* options bit field (uint16_t):
* 15.. 4 reserved (0)
/** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH (uint16_t) */
int indexLength;
-
+
/** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT (uint16_t) */
int shiftedDataLength;
*/
int shiftedHighStart;
}
-
+
//
// Data members of UTrie2.
//
char index[]; // Index array. Includes data for 16 bit Tries.
int data16; // Offset to data portion of the index array, if 16 bit data.
// zero if 32 bit data.
- int data32[]; // NULL if 16b data is used via index
+ int data32[]; // NULL if 16b data is used via index
int indexLength;
int dataLength;
/* Start of the last range which ends at U+10ffff, and its value. */
int highStart;
int highValueIndex;
-
+
int dataNullOffset;
-
+
int fHash; // Zero if not yet computed.
// Shared by Trie2Writable, Trie2_16, Trie2_32.
// Thread safety: if two racing threads compute
// the same hash on a frozen Trie2, no damage is done.
-
+
/**
* Trie2 constants, defining shift widths, index array lengths, etc.
*
* These are needed for the runtime macros but users can treat these as
* implementation details and skip to the actual public API further below.
*/
-
+
static final int UTRIE2_OPTIONS_VALUE_BITS_MASK=0x000f;
-
-
+
+
/** Shift size for getting the index-1 table offset. */
static final int UTRIE2_SHIFT_1=6+5;
/** Number of code points per index-1 table entry. 2048=0x800 */
static final int UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1;
-
+
/** Number of entries in an index-2 block. 64=0x40 */
static final int UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2;
-
+
/** Mask for getting the lower bits for the in-index-2-block offset. */
static final int UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
-
+
/** Number of entries in a data block. 32=0x20 */
static final int UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2;
-
+
/** Mask for getting the lower bits for the in-data-block offset. */
static final int UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1;
-
+
/**
* Shift size for shifting left the index array values.
* Increases possible data size with 16-bit index values at the cost
* This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY.
*/
static final int UTRIE2_INDEX_SHIFT=2;
-
+
/** The alignment size of a data block. Also the granularity for compaction. */
static final int UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT;
-
+
/* Fixed layout of the first part of the index array. ------------------- */
-
+
/**
* The BMP part of the index-2 table is fixed and linear and starts at offset 0.
* Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2.
*/
static final int UTRIE2_INDEX_2_OFFSET=0;
-
+
/**
* The part of the index-2 table for U+D800..U+DBFF stores values for
* lead surrogate code _units_ not code _points_.
*/
static final int UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2;
static final int UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2;
-
+
/** Count the lengths of both BMP pieces. 2080=0x820 */
static final int UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH;
-
+
/**
* The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820.
* Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2.
*/
static final int UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH;
static final int UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6; /* U+0800 is the first code point after 2-byte UTF-8 */
-
+
/**
* The index-1 table, only used for supplementary code points, at offset 2112=0x840.
* Variable length, for code points up to highStart, where the last single-value range starts.
*/
static final int UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH;
static final int UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1;
-
+
/*
* Fixed layout of the first part of the data array. -----------------------
* Starts with 4 blocks (128=0x80 entries) for ASCII.
*/
-
+
/**
* The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80.
* Used with linear access for single bytes 0..0xbf for simple error handling.
* Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH.
*/
static final int UTRIE2_BAD_UTF8_DATA_OFFSET=0x80;
-
+
/** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */
static final int UTRIE2_DATA_START_OFFSET=0xc0;
-
+
/* Building a Trie2 ---------------------------------------------------------- */
/*
*/
static final int UNEWTRIE2_MAX_DATA_LENGTH = (0x110000+0x40+0x40+0x400);
-
-
- /**
+
+
+ /**
* Implementation class for an iterator over a Trie2.
- *
+ *
* Iteration over a Trie2 first returns all of the ranges that are indexed by code points,
* then returns the special alternate values for the lead surrogates
- *
+ *
* @internal
*/
class Trie2Iterator implements Iterator<Range> {
limitCP = 0x110000;
doLeadSurrogates = true;
}
-
+
// An alternate constructor that configures the iterator to cover only the
// code points corresponding to a particular Lead Surrogate value.
Trie2Iterator(char leadSurrogate, ValueMapper vm) {
doLeadSurrogates = false; // Do not iterate over lead the special lead surrogate
// values after completing iteration over code points.
}
-
+
/**
* The main next() function for Trie2 iterators
- *
+ *
*/
+ @Override
public Range next() {
if (!hasNext()) {
throw new NoSuchElementException();
int endOfRange = 0;
int val = 0;
int mappedVal = 0;
-
+
if (doingCodePoints) {
// Iteration over code point values.
val = get(nextStart);
}
} else {
// Iteration over the alternate lead surrogate values.
- val = getFromU16SingleLead((char)nextStart);
+ val = getFromU16SingleLead((char)nextStart);
mappedVal = mapper.map(val);
endOfRange = rangeEndLS((char)nextStart);
// Loop once for each range in the Trie2 with the same raw (unmapped) value.
returnValue.endCodePoint = endOfRange;
returnValue.value = mappedVal;
returnValue.leadSurrogate = !doingCodePoints;
- nextStart = endOfRange+1;
+ nextStart = endOfRange+1;
return returnValue;
}
-
+
/**
- *
+ *
*/
+ @Override
public boolean hasNext() {
return doingCodePoints && (doLeadSurrogates || nextStart < limitCP) || nextStart < 0xdc00;
}
-
+
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
-
-
+
+
/**
* Find the last lead surrogate in a contiguous range with the
* same Trie2 value as the input character.
- *
+ *
* Use the alternate Lead Surrogate values from the Trie2,
* not the code-point values.
- *
+ *
* Note: Trie2_16 and Trie2_32 override this implementation with optimized versions,
* meaning that the implementation here is only being used with
* Trie2Writable. The code here is logically correct with any type
* of Trie2, however.
- *
+ *
* @param c The character to begin with.
* @return The last contiguous character with the same value.
*/
if (startingLS >= 0xdbff) {
return 0xdbff;
}
-
+
int c;
int val = getFromU16SingleLead(startingLS);
for (c = startingLS+1; c <= 0x0dbff; c++) {
}
return c-1;
}
-
+
//
// Iteration State Variables
//
// The upper limit for the last normal range to be returned. Normally 0x110000, but
// may be lower when iterating over the code points for a single lead surrogate.
private int limitCP;
-
+
// True while iterating over the the Trie2 values for code points.
// False while iterating over the alternate values for lead surrogates.
private boolean doingCodePoints = true;
-
+
// True if the iterator should iterate the special values for lead surrogates in
// addition to the normal values for code points.
private boolean doLeadSurrogates = true;
}
-
+
/**
* Find the last character in a contiguous range of characters with the
* same Trie2 value as the input character.
- *
+ *
* @param c The character to begin with.
* @return The last contiguous character with the same value.
*/
int rangeEnd(int start, int limitp, int val) {
int c;
int limit = Math.min(highStart, limitp);
-
+
for (c = start+1; c < limit; c++) {
if (get(c) != val) {
break;
}
return c - 1;
}
-
-
+
+
//
// Hashing implementation functions. FNV hash. Respected public domain algorithm.
//
private static int initHash() {
return 0x811c9DC5; // unsigned 2166136261
}
-
+
private static int hashByte(int h, int b) {
h = h * 16777619;
h = h ^ b;
return h;
}
-
+
private static int hashUChar32(int h, int c) {
h = Trie2.hashByte(h, c & 255);
h = Trie2.hashByte(h, (c>>8) & 255);
h = Trie2.hashByte(h, c>>16);
return h;
}
-
+
private static int hashInt(int h, int i) {
h = Trie2.hashByte(h, i & 255);
h = Trie2.hashByte(h, (i>>8) & 255);
* <p>Result of each iteration contains the interval of codepoints that have
* the same value type and the value type itself.</p>
* <p>The comparison of each codepoint value is done via extract(), which the
- * default implementation is to return the value as it is.</p>
- * <p>Method extract() can be overwritten to perform manipulations on
+ * default implementation is to return the value as it is.</p>
+ * <p>Method extract() can be overwritten to perform manipulations on
* codepoint values in order to perform specialized comparison.</p>
* <p>TrieIterator is designed to be a generic iterator for the CharTrie
- * and the IntTrie, hence to accommodate both types of data, the return
+ * and the IntTrie, hence to accommodate both types of data, the return
* result will be in terms of int (32 bit) values.</p>
* <p>See com.ibm.icu.text.UCharacterTypeIterator for examples of use.</p>
* <p>Notes for porting utrie_enum from icu4c to icu4j:<br>
* Internally, icu4c's utrie_enum performs all iterations in its body. In Java
- * sense, the caller will have to pass a object with a callback function
- * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
- * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
- * codepoints with the same value as determined by
- * UTrieEnumValue(const void *context, uint32_t value). for each range,
+ * sense, the caller will have to pass a object with a callback function
+ * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit,
+ * uint32_t value) into utrie_enum. utrie_enum will then find ranges of
+ * codepoints with the same value as determined by
+ * UTrieEnumValue(const void *context, uint32_t value). for each range,
* utrie_enum calls the callback function to perform a task. In this way,
* icu4c performs the iteration within utrie_enum.
* To follow the JDK model, icu4j is slightly different from icu4c.
* Instead of requesting the caller to implement an object for a callback.
* The caller will have to implement a subclass of TrieIterator, fleshing out
- * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
- * the caller will have to code his own iteration and flesh out the task
+ * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j,
+ * the caller will have to code his own iteration and flesh out the task
* (equivalent to UTrieEnumRange) to be performed in the iteration loop.
* </p>
* <p>There are basically 3 usage scenarios for porting:</p>
- * <p>1) UTrieEnumValue is the only implemented callback then just implement a
- * subclass of TrieIterator and override the extract(int) method. The
+ * <p>1) UTrieEnumValue is the only implemented callback then just implement a
+ * subclass of TrieIterator and override the extract(int) method. The
* extract(int) method is analogus to UTrieEnumValue callback.
* </p>
- * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
+ * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement
* a subclass of TrieIterator, override the extract method and iterate, e.g
* </p>
- * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
+ * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange,
* set);<br>
* In Java :<br>
* <pre>
* // port the implementation of _enumPropertyStartsValue here
* }
* }
- * ....
+ * ....
* TrieIterator fcdIter = new TrieIteratorImpl(fcdTrieImpl.fcdTrie);
* while(fcdIter.next(result)) {
* // port the implementation of _enumPropertyStartsRange
* }
* </pre>
* </p>
- * <p>3) UTrieEnumRange is the only implemented callback then just implement
+ * <p>3) UTrieEnumRange is the only implemented callback then just implement
* the while loop, when utrie_enum is called
* <pre>
* // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set);
{
// public constructor ---------------------------------------------
-
+
/**
* TrieEnumeration constructor
* @param trie to be used
m_initialValue_ = extract(m_trie_.getInitialValue());
reset();
}
-
+
// public methods -------------------------------------------------
-
+
/**
- * <p>Returns true if we are not at the end of the iteration, false
+ * <p>Returns true if we are not at the end of the iteration, false
* otherwise.</p>
- * <p>The next set of codepoints with the same value type will be
+ * <p>The next set of codepoints with the same value type will be
* calculated during this call and returned in the arguement element.</p>
- * @param element return result
+ * @param element return result
* @return true if we are not at the end of the iteration, false otherwise.
* @exception NoSuchElementException - if no more elements exist.
* @see com.ibm.icu.util.RangeValueIterator.Element
*/
+ @Override
public final boolean next(Element element)
{
if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
calculateNextBMPElement(element)) {
return true;
- }
+ }
calculateNextSupplementaryElement(element);
return true;
}
-
+
/**
* Resets the iterator to the beginning of the iteration
*/
+ @Override
public final void reset()
{
m_currentCodepoint_ = 0;
m_nextBlockIndex_ = 0;
m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_;
}
-
+
// protected methods ----------------------------------------------
-
+
/**
* Called by next() to extracts a 32 bit value from a trie value
* used for comparison.
{
return value;
}
-
+
// private methods ------------------------------------------------
-
+
/**
* Set the result values
* @param element return result object
- * @param start codepoint of range
+ * @param start codepoint of range
* @param limit (end + 1) codepoint of range
* @param value common value of range
*/
- private final void setResult(Element element, int start, int limit,
+ private final void setResult(Element element, int start, int limit,
int value)
{
element.start = start;
element.limit = limit;
element.value = value;
}
-
+
/**
* Finding the next element.
- * This method is called just before returning the result of
+ * This method is called just before returning the result of
* next().
* We always store the next element before it is requested.
- * In the case that we have to continue calculations into the
+ * In the case that we have to continue calculations into the
* supplementary planes, a false will be returned.
* @param element return result object
* @return true if the next range is found, false if we have to proceed to
m_nextCodepoint_ ++;
m_nextBlockIndex_ ++;
if (!checkBlockDetail(currentValue)) {
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
return true;
}
- // synwee check that next block index == 0 here
+ // synwee check that next block index == 0 here
// enumerate BMP - the main loop enumerates data blocks
while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) {
// because of the way the character is split to form the index
} else {
m_nextIndex_ ++;
}
-
+
m_nextBlockIndex_ = 0;
if (!checkBlock(currentValue)) {
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
return true;
}
* lower bound of the next element, in calculateNextBMP() it gets set
* at the start of any loop, where-else, in calculateNextSupplementary()
* since m_currentCodepoint_ already contains the lower bound of the
- * next element (passed down from calculateNextBMP()), we keep it till
+ * next element (passed down from calculateNextBMP()), we keep it till
* the end before resetting it to the new value.
- * Note, if there are no more iterations, it will never get to here.
+ * Note, if there are no more iterations, it will never get to here.
* Blocked out by next().
* @param element return result object
*/
int currentValue = m_nextValue_;
m_nextCodepoint_ ++;
m_nextBlockIndex_ ++;
-
- if (UTF16.getTrailSurrogate(m_nextCodepoint_)
- != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
+
+ if (UTF16.getTrailSurrogate(m_nextCodepoint_)
+ != UTF16.TRAIL_SURROGATE_MIN_VALUE) {
// this piece is only called when we are in the middle of a lead
// surrogate block
if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) {
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
m_currentCodepoint_ = m_nextCodepoint_;
return;
m_nextIndex_ ++;
m_nextTrailIndexOffset_ ++;
if (!checkTrailBlock(currentValue)) {
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
m_currentCodepoint_ = m_nextCodepoint_;
return;
// enumerate supplementary code points
while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) {
// lead surrogate access
- final int leadBlock =
- m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
+ final int leadBlock =
+ m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
Trie.INDEX_STAGE_2_SHIFT_;
if (leadBlock == m_trie_.m_dataOffset_) {
// no entries for a whole block of lead surrogates
m_nextValue_ = m_initialValue_;
m_nextBlock_ = leadBlock; // == m_trie_.m_dataOffset_
m_nextBlockIndex_ = 0;
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
m_currentCodepoint_ = m_nextCodepoint_;
return;
nextLead += DATA_BLOCK_LENGTH_;
// number of total affected supplementary codepoints in one
// block
- // this is not a simple addition of
+ // this is not a simple addition of
// DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider
// that we might have moved some of the codepoints
m_nextCodepoint_ = Character.toCodePoint((char)nextLead, (char)UTF16.TRAIL_SURROGATE_MIN_VALUE);
}
// enumerate trail surrogates for this lead surrogate
m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
- m_trie_.getValue(leadBlock +
+ m_trie_.getValue(leadBlock +
(nextLead & Trie.INDEX_STAGE_3_MASK_)));
if (m_nextIndex_ <= 0) {
// no data for this lead surrogate
m_nextValue_ = m_initialValue_;
m_nextBlock_ = m_trie_.m_dataOffset_;
m_nextBlockIndex_ = 0;
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
m_currentCodepoint_ = m_nextCodepoint_;
return;
} else {
m_nextTrailIndexOffset_ = 0;
if (!checkTrailBlock(currentValue)) {
- setResult(element, m_currentCodepoint_, m_nextCodepoint_,
+ setResult(element, m_currentCodepoint_, m_nextCodepoint_,
currentValue);
m_currentCodepoint_ = m_nextCodepoint_;
return;
}
- }
+ }
nextLead ++;
}
// deliver last range
- setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
+ setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1,
currentValue);
- }
-
+ }
+
/**
* Internal block value calculations
* Performs calculations on a data block to find codepoints in m_nextBlock_
private final boolean checkBlockDetail(int currentValue)
{
while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) {
- m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
+ m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ +
m_nextBlockIndex_));
if (m_nextValue_ != currentValue) {
return false;
}
return true;
}
-
+
/**
* Internal block value calculations
* Performs calculations on a data block to find codepoints in m_nextBlock_
- * that has the same value.
+ * that has the same value.
* Will call checkBlockDetail() if highlevel check fails.
* Note m_*_ variables at this point is the next codepoint whose value
* has not been calculated.
* @return true if the whole block has the same value as currentValue or if
* the whole block has been calculated, false otherwise.
*/
- private final boolean checkBlock(int currentValue)
+ private final boolean checkBlock(int currentValue)
{
int currentBlock = m_nextBlock_;
- m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
+ m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] <<
Trie.INDEX_STAGE_2_SHIFT_;
if (m_nextBlock_ == currentBlock &&
(m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) {
- // the block is the same as the previous one, filled with
+ // the block is the same as the previous one, filled with
// currentValue
m_nextCodepoint_ += DATA_BLOCK_LENGTH_;
}
}
return true;
}
-
+
/**
* Internal block value calculations
- * Performs calculations on multiple data blocks for a set of trail
- * surrogates to find codepoints in m_nextBlock_ that has the same value.
+ * Performs calculations on multiple data blocks for a set of trail
+ * surrogates to find codepoints in m_nextBlock_ that has the same value.
* Will call checkBlock() for internal block checks.
* Note m_*_ variables at this point is the next codepoint whose value
* has not been calculated.
private final boolean checkTrailBlock(int currentValue)
{
// enumerate code points for this lead surrogate
- while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
+ while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_)
{
// if we ever reach here, we are at the start of a new block
m_nextBlockIndex_ = 0;
}
return true;
}
-
+
/**
* Checks if we are beginning at the start of a initial block.
* If we are then the rest of the codepoints in this initial block
if (m_nextIndex_ <= 0) {
m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_);
- int leadBlock =
- m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
+ int leadBlock =
+ m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
Trie.INDEX_STAGE_2_SHIFT_;
if (m_trie_.m_dataManipulate_ == null) {
throw new NullPointerException(
"The field DataManipulate in this Trie is null");
}
m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
- m_trie_.getValue(leadBlock +
+ m_trie_.getValue(leadBlock +
(nextLead & Trie.INDEX_STAGE_3_MASK_)));
m_nextIndex_ --;
m_nextBlockIndex_ = DATA_BLOCK_LENGTH_;
/**
* Number of data values in a stage 2 (data array) block.
*/
- private static final int DATA_BLOCK_LENGTH_ =
+ private static final int DATA_BLOCK_LENGTH_ =
1 << Trie.INDEX_STAGE_1_SHIFT_;
// /**
// * Number of codepoints in a stage 2 block
// implement ICUBinary.Authenticate
private final static class IsAcceptable implements ICUBinary.Authenticate {
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0]==2;
}
start=indexes[IX_JG_START];
limit=indexes[IX_JG_LIMIT];
if(start<=c && c<limit) {
- return (int)jgArray[c-start]&0xff;
+ return jgArray[c-start]&0xff;
}
start=indexes[IX_JG_START2];
limit=indexes[IX_JG_LIMIT2];
if(start<=c && c<limit) {
- return (int)jgArray2[c-start]&0xff;
+ return jgArray2[c-start]&0xff;
}
return UCharacter.JoiningGroup.NO_JOINING_GROUP;
}
public UCharArrayIterator(char[] text, int start, int limit) {
if (start < 0 || limit > text.length || start > limit) {
throw new IllegalArgumentException("start: " + start + " or limit: "
- + limit + " out of range [0, "
+ + limit + " out of range [0, "
+ text.length + ")");
}
this.text = text;
this.pos = start;
}
+ @Override
public int current() {
return pos < limit ? text[pos] : DONE;
}
+ @Override
public int getLength() {
return limit - start;
}
+ @Override
public int getIndex() {
return pos - start;
}
+ @Override
public int next() {
return pos < limit ? text[pos++] : DONE;
}
+ @Override
public int previous() {
return pos > start ? text[--pos] : DONE;
}
+ @Override
public void setIndex(int index) {
if (index < 0 || index > limit - start) {
- throw new IndexOutOfBoundsException("index: " + index +
- " out of range [0, "
+ throw new IndexOutOfBoundsException("index: " + index +
+ " out of range [0, "
+ (limit - start) + ")");
}
pos = start + index;
}
+ @Override
public int getText(char[] fillIn, int offset) {
int len = limit - start;
System.arraycopy(text, start, fillIn, offset, len);
}
/**
- * Creates a copy of this iterator, does not clone the underlying
+ * Creates a copy of this iterator, does not clone the underlying
* <code>Replaceable</code>object
* @return copy of this iterator
*/
+ @Override
public Object clone(){
try {
return super.clone();
* others. All Rights Reserved. *
*******************************************************************************
*/
-
+
package com.ibm.icu.impl;
import java.text.CharacterIterator;
import com.ibm.icu.text.UCharacterIterator;
/**
- * This class is a wrapper around UCharacterIterator and implements the
+ * This class is a wrapper around UCharacterIterator and implements the
* CharacterIterator protocol
* @author ram
*/
public class UCharacterIteratorWrapper implements CharacterIterator{
-
+
public UCharacterIteratorWrapper(UCharacterIterator iter){
this.iterator = iter;
}
-
+
private UCharacterIterator iterator;
* @return the first character in the text, or DONE if the text is empty
* @see #getBeginIndex()
*/
+ @Override
public char first(){
//UCharacterIterator always iterates from 0 to length
iterator.setToStart();
* @return the last character in the text, or DONE if the text is empty
* @see #getEndIndex()
*/
+ @Override
public char last(){
iterator.setToLimit();
return (char)iterator.previous();
* position is off the end of the text.
* @see #getIndex()
*/
+ @Override
public char current(){
return (char) iterator.current();
}
* @return the character at the new position or DONE if the new
* position is off the end of the text range.
*/
+ @Override
public char next(){
//pre-increment
iterator.next();
* @return the character at the new position or DONE if the current
* position is equal to getBeginIndex().
*/
+ @Override
public char previous(){
//pre-decrement
return (char) iterator.previous();
* if an invalid value is supplied.
* @return the character at the specified position or DONE if the specified position is equal to getEndIndex()
*/
+ @Override
public char setIndex(int position){
iterator.setIndex(position);
return (char) iterator.current();
* Returns the start index of the text.
* @return the index at which the text begins.
*/
+ @Override
public int getBeginIndex(){
//UCharacterIterator always starts from 0
return 0;
* character following the end of the text.
* @return the index after the last character in the text
*/
+ @Override
public int getEndIndex(){
return iterator.getLength();
}
* Returns the current index.
* @return the current index.
*/
+ @Override
public int getIndex(){
return iterator.getIndex();
}
* Create a copy of this iterator
* @return A copy of this
*/
+ @Override
public Object clone(){
try {
UCharacterIteratorWrapper result = (UCharacterIteratorWrapper) super.clone();
result.iterator = (UCharacterIterator)this.iterator.clone();
return result;
- } catch (CloneNotSupportedException e) {
+ } catch (CloneNotSupportedException e) {
return null; // only invoked if bad underlying character iterator
}
- }
+ }
}
import java.util.Arrays;
/**
-* <p>Internal reader class for ICU data file uname.dat containing
-* Unicode codepoint name data.</p>
+* <p>Internal reader class for ICU data file uname.dat containing
+* Unicode codepoint name data.</p>
* <p>This class simply reads unames.icu, authenticates that it is a valid
* ICU data file and split its contents up into blocks of data for use in
* <a href=UCharacterName.html>com.ibm.icu.impl.UCharacterName</a>.
-* </p>
-* <p>unames.icu which is in big-endian format is jared together with this
+* </p>
+* <p>unames.icu which is in big-endian format is jared together with this
* package.</p>
* @author Syn Wee Quek
* @since release 2.1, February 1st 2002
*/
final class UCharacterNameReader implements ICUBinary.Authenticate
-{
+{
// public methods ----------------------------------------------------
-
+
+ @Override
public boolean isDataVersionAcceptable(byte version[])
{
return version[0] == 1;
m_groupindex_ = m_byteBuffer_.getInt();
m_groupstringindex_ = m_byteBuffer_.getInt();
m_algnamesindex_ = m_byteBuffer_.getInt();
-
+
// reading tokens
int count = m_byteBuffer_.getChar();
char token[] = ICUBinary.getChars(m_byteBuffer_, count, 0);
byte tokenstr[] = new byte[size];
m_byteBuffer_.get(tokenstr);
data.setToken(token, tokenstr);
-
+
// reading the group information records
count = m_byteBuffer_.getChar();
data.setGroupCountSize(count, GROUP_INFO_SIZE_);
size = m_algnamesindex_ - m_groupstringindex_;
byte groupstring[] = new byte[size];
m_byteBuffer_.get(groupstring);
-
+
data.setGroup(group, groupstring);
-
+
count = m_byteBuffer_.getInt();
- UCharacterName.AlgorithmName alg[] =
+ UCharacterName.AlgorithmName alg[] =
new UCharacterName.AlgorithmName[count];
-
+
for (int i = 0; i < count; i ++)
{
UCharacterName.AlgorithmName an = readAlg();
}
data.setAlgorithm(alg);
}
-
+
/**
* <p>Checking the file for the correct format.</p>
* @param dataformatid
isDataVersionAcceptable(dataformatversion);
}
///CLOVER:ON
-
+
// private variables -------------------------------------------------
/**
private int m_groupindex_;
private int m_groupstringindex_;
private int m_algnamesindex_;
-
+
/**
* Size of an algorithmic name information group
- * start code point size + end code point size + type size + variant size +
+ * start code point size + end code point size + type size + variant size +
* size of data size
*/
private static final int ALG_INFO_SIZE_ = 12;
private static final int DATA_FORMAT_ID_ = 0x756E616D;
// private methods ---------------------------------------------------
-
+
/**
* Reads an individual record of AlgorithmNames
* @return an instance of AlgorithNames if read is successful otherwise null
*/
private UCharacterName.AlgorithmName readAlg() throws IOException
{
- UCharacterName.AlgorithmName result =
+ UCharacterName.AlgorithmName result =
new UCharacterName.AlgorithmName();
int rangestart = m_byteBuffer_.getInt();
int rangeend = m_byteBuffer_.getInt();
if (!result.setInfo(rangestart, rangeend, type, variant)) {
return null;
}
-
+
int size = m_byteBuffer_.getChar();
if (type == UCharacterName.AlgorithmName.TYPE_1_)
{
result.setFactor(factor);
size -= (variant << 1);
}
-
+
StringBuilder prefix = new StringBuilder();
char c = (char)(m_byteBuffer_.get() & 0x00FF);
while (c != 0)
prefix.append(c);
c = (char)(m_byteBuffer_.get() & 0x00FF);
}
-
+
result.setPrefix(prefix.toString());
-
+
size -= (ALG_INFO_SIZE_ + prefix.length() + 1);
-
+
if (size > 0)
{
byte string[] = new byte[size];
super(SRC_CASE);
this.which=which;
}
+ @Override
boolean contains(int c) {
return UCaseProps.INSTANCE.hasBinaryProperty(c, which);
}
super(source);
this.which=which;
}
+ @Override
boolean contains(int c) {
return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c);
}
new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)),
new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)),
new BinaryProperty(SRC_BIDI) { // UCHAR_BIDI_CONTROL
+ @Override
boolean contains(int c) {
return UBiDiProps.INSTANCE.isBidiControl(c);
}
},
new BinaryProperty(SRC_BIDI) { // UCHAR_BIDI_MIRRORED
+ @Override
boolean contains(int c) {
return UBiDiProps.INSTANCE.isMirrored(c);
}
new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)),
new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)),
new BinaryProperty(SRC_NFC) { // UCHAR_FULL_COMPOSITION_EXCLUSION
+ @Override
boolean contains(int c) {
// By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl;
new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)),
new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)),
new BinaryProperty(SRC_BIDI) { // UCHAR_JOIN_CONTROL
+ @Override
boolean contains(int c) {
return UBiDiProps.INSTANCE.isJoinControl(c);
}
new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT),
new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT),
new BinaryProperty(SRC_NFC_CANON_ITER) { // UCHAR_SEGMENT_STARTER
+ @Override
boolean contains(int c) {
return Norm2AllModes.getNFCInstance().impl.
ensureCanonIterData().isCanonSegmentStarter(c);
new BinaryProperty(1, (1<<PATTERN_SYNTAX)),
new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)),
new BinaryProperty(SRC_CHAR_AND_PROPSVEC) { // UCHAR_POSIX_ALNUM
+ @Override
boolean contains(int c) {
return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c);
}
},
new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_BLANK
+ @Override
boolean contains(int c) {
// "horizontal space"
if(c<=0x9f) {
}
},
new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_GRAPH
+ @Override
boolean contains(int c) {
return isgraphPOSIX(c);
}
},
new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_PRINT
+ @Override
boolean contains(int c) {
/*
* Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}.
}
},
new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_XDIGIT
+ @Override
boolean contains(int c) {
/* check ASCII and Fullwidth ASCII a-fA-F */
if(
new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED),
new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED),
new BinaryProperty(SRC_CASE_AND_NORM) { // UCHAR_CHANGES_WHEN_CASEFOLDED
+ @Override
boolean contains(int c) {
String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c);
if(nfd!=null) {
},
new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED),
new BinaryProperty(SRC_NFKC_CF) { // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
+ @Override
boolean contains(int c) {
Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl;
String src=UTF16.valueOf(c);
BiDiIntProperty() {
super(SRC_BIDI);
}
+ @Override
int getMaxValue(int which) {
return UBiDiProps.INSTANCE.getMaxValue(which);
}
CombiningClassIntProperty(int source) {
super(source);
}
+ @Override
int getMaxValue(int which) {
return 0xff;
}
this.which=which;
this.max=max;
}
+ @Override
int getValue(int c) {
return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c);
}
+ @Override
int getMaxValue(int which) {
return max;
}
IntProperty intProps[]={
new BiDiIntProperty() { // BIDI_CLASS
+ @Override
int getValue(int c) {
return UBiDiProps.INSTANCE.getClass(c);
}
},
new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_),
new CombiningClassIntProperty(SRC_NFC) { // CANONICAL_COMBINING_CLASS
+ @Override
int getValue(int c) {
return Normalizer2.getNFDInstance().getCombiningClass(c);
}
new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0),
new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_),
new IntProperty(SRC_CHAR) { // GENERAL_CATEGORY
+ @Override
int getValue(int c) {
return getType(c);
}
+ @Override
int getMaxValue(int which) {
return UCharacterCategory.CHAR_CATEGORY_COUNT-1;
}
},
new BiDiIntProperty() { // JOINING_GROUP
+ @Override
int getValue(int c) {
return UBiDiProps.INSTANCE.getJoiningGroup(c);
}
},
new BiDiIntProperty() { // JOINING_TYPE
+ @Override
int getValue(int c) {
return UBiDiProps.INSTANCE.getJoiningType(c);
}
},
new IntProperty(2, LB_MASK, LB_SHIFT), // LINE_BREAK
new IntProperty(SRC_CHAR) { // NUMERIC_TYPE
+ @Override
int getValue(int c) {
return ntvGetType(getNumericTypeValue(getProperty(c)));
}
+ @Override
int getMaxValue(int which) {
return NumericType.COUNT-1;
}
},
new IntProperty(0, SCRIPT_MASK_, 0) {
+ @Override
int getValue(int c) {
return UScript.getScript(c);
}
},
new IntProperty(SRC_PROPSVEC) { // HANGUL_SYLLABLE_TYPE
+ @Override
int getValue(int c) {
/* see comments on gcbToHst[] above */
int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
return HangulSyllableType.NOT_APPLICABLE;
}
}
+ @Override
int getMaxValue(int which) {
return HangulSyllableType.COUNT-1;
}
new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2),
new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2),
new CombiningClassIntProperty(SRC_NFC) { // LEAD_CANONICAL_COMBINING_CLASS
+ @Override
int getValue(int c) {
return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8;
}
},
new CombiningClassIntProperty(SRC_NFC) { // TRAIL_CANONICAL_COMBINING_CLASS
+ @Override
int getValue(int c) {
return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff;
}
new IntProperty(2, SB_MASK, SB_SHIFT), // SENTENCE_BREAK
new IntProperty(2, WB_MASK, WB_SHIFT), // WORD_BREAK
new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE
+ @Override
int getValue(int c) {
return UBiDiProps.INSTANCE.getPairedBracketType(c);
}
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 7;
}
/* add for u_charDigitValue() */
// TODO remove when UCharacter.getHanNumericValue() is changed to just return
- // Unicode numeric values
+ // Unicode numeric values
set.add(0x3007);
set.add(0x3008);
set.add(0x4e00);
private static final class IsAcceptable implements ICUBinary.Authenticate {
// @Override when we switch to Java 6
+ @Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0]==2;
}
// Find the end of this name.
int nameStart=nameGroupsIndex;
while(0!=nameGroups.charAt(nameGroupsIndex)) {
- ++nameGroupsIndex;
+ ++nameGroupsIndex;
}
if(nameStart==nameGroupsIndex) {
return null; // no name (Property[Value]Aliases.txt has "n/a")
public abstract class URLHandler {
public static final String PROPNAME = "urlhandler.props";
-
+
private static final Map<String, Method> handlers;
-
+
private static final boolean DEBUG = ICUDebug.enabled("URLHandler");
-
+
static {
Map<String, Method> h = null;
-
+
BufferedReader br = null;
try {
@SuppressWarnings("resource") // Closed by BufferedReader.
if (is != null) {
Class<?>[] params = { URL.class };
br = new BufferedReader(new InputStreamReader(is));
-
+
for (String line = br.readLine(); line != null; line = br.readLine()) {
line = line.trim();
-
+
if (line.length() == 0 || line.charAt(0) == '#') {
continue;
}
-
+
int ix = line.indexOf('=');
-
+
if (ix == -1) {
if (DEBUG) System.err.println("bad urlhandler line: '" + line + "'");
break;
}
-
+
String key = line.substring(0, ix).trim();
String value = line.substring(ix+1).trim();
-
+
try {
Class<?> cl = Class.forName(value);
Method m = cl.getDeclaredMethod("get", params);
-
+
if (h == null) {
h = new HashMap<String, Method>();
}
-
+
h.put(key, m);
}
catch (ClassNotFoundException e) {
if (url == null) {
return null;
}
-
+
String protocol = url.getProtocol();
-
+
if (handlers != null) {
Method m = handlers.get(protocol);
-
+
if (m != null) {
try {
URLHandler handler = (URLHandler)m.invoke(null, new Object[] { url });
-
+
if (handler != null) {
return handler;
}
}
}
}
-
+
return getDefault(url);
}
-
+
protected static URLHandler getDefault(URL url) {
URLHandler handler = null;
}
return handler;
}
-
+
private static class FileURLHandler extends URLHandler {
File file;
throw new IllegalArgumentException();
}
}
-
+
+ @Override
public void guide(URLVisitor v, boolean recurse, boolean strip) {
if (file.isDirectory()) {
process(v, recurse, strip, "/", file.listFiles());
v.visit(file.getName());
}
}
-
+
private void process(URLVisitor v, boolean recurse, boolean strip, String path, File[] files) {
if (files != null) {
for (int i = 0; i < files.length; i++) {
File f = files[i];
-
+
if (f.isDirectory()) {
if (recurse) {
process(v, recurse, strip, path + f.getName()+ '/', f.listFiles());
}
}
}
-
+
private static class JarURLHandler extends URLHandler {
JarFile jarFile;
String prefix;
JarURLHandler(URL url) {
try {
prefix = url.getPath();
-
+
int ix = prefix.lastIndexOf("!/");
-
+
if (ix >= 0) {
prefix = prefix.substring(ix + 2); // truncate after "!/"
}
throw new IllegalArgumentException("jar error: " + e.getMessage());
}
}
-
+
+ @Override
public void guide(URLVisitor v, boolean recurse, boolean strip) {
try {
Enumeration<JarEntry> entries = jarFile.entries();
-
+
while (entries.hasMoreElements()) {
JarEntry entry = entries.nextElement();
-
+
if (!entry.isDirectory()) { // skip just directory paths
String name = entry.getName();
-
+
if (name.startsWith(prefix)) {
name = name.substring(prefix.length());
int ix = name.lastIndexOf('/');
{
guide(visitor, recurse, true);
}
-
+
public abstract void guide(URLVisitor visitor, boolean recurse, boolean strip);
-
+
public interface URLVisitor {
void visit(String str);
}
* consistent with Java regex, so be careful of the differences.
* <p>Not thread-safe; create a separate copy for different threads.
* <p>In the future, we may extend this to support other regex packages.
- *
+ *
* @regex A modified Java regex pattern, as in the input to
* Pattern.compile(), except that all "character classes" are
* processed as if they were UnicodeSet patterns. Example:
* @return A processed Java regex pattern, suitable for input to
* Pattern.compile().
*/
+ @Override
public String transform(String regex) {
StringBuilder result = new StringBuilder();
UnicodeSet temp = new UnicodeSet();
/**
* Compile a regex string, after processing by fix(...).
- *
+ *
* @param regex Raw regex pattern, as in fix(...).
* @return Pattern
*/
/**
* Compile a regex string, after processing by fix(...).
- *
+ *
* @param regex Raw regex pattern, as in fix(...).
* @return Pattern
*/
/**
* Compile a composed string from a set of BNF lines; see the List version for more information.
- *
+ *
* @param bnfLines Series of BNF lines.
* @return Pattern
*/
* </pre>
* <p>
* Caveats: at this point the parsing is simple; for example, # cannot be
- * quoted (use \\u0023); you can set it to null to disable.
+ * quoted (use \\u0023); you can set it to null to disable.
* The equality sign and a few others can be reset with
* setBnfX().
- *
+ *
* @param lines Series of lines that represent a BNF expression. The lines contain
* a series of statements that of the form x=y;. A statement can take
* multiple lines, but there can't be multiple statements on a line.
for (Entry<String, String> entry : variables.entrySet()) {
String variable = entry.getKey(),
definition = entry.getValue();
-
+
for (Entry<String, String> entry2 : variables.entrySet()) {
String variable2 = entry2.getKey(),
definition2 = entry2.getValue();
}
return result;
}
-
-
+
+
/* (non-Javadoc)
* @see com.ibm.icu.util.Freezable#cloneAsThawed()
*/
+ @Override
public UnicodeRegex cloneAsThawed() {
// TODO Auto-generated method stub
try {
/* (non-Javadoc)
* @see com.ibm.icu.util.Freezable#freeze()
*/
+ @Override
public UnicodeRegex freeze() {
// no action needed now.
return this;
/* (non-Javadoc)
* @see com.ibm.icu.util.Freezable#isFrozen()
*/
+ @Override
public boolean isFrozen() {
// at this point, always true
return true;
// private Appendable log = null;
private Comparator<Object> LongestFirst = new Comparator<Object>() {
+ @Override
public int compare(Object obj0, Object obj1) {
String arg0 = obj0.toString();
String arg1 = obj1.toString();
public class HolidayBundle extends ListResourceBundle {
// Normally, each HolidayBundle uses the holiday's US English name
- // as the string key for looking up the localized name. This means
+ // as the string key for looking up the localized name. This means
// that the key itself can be used if no name is found for the requested
// locale.
//
// For holidays where the key is _not_ the English name, e.g. in the
// case of conflicts, the English name must be given here.
//
- static private final Object[][] fContents = {
- { "", "" }, // Can't be empty!
+ static private final Object[][] fContents = { { "", "" }, // Can't be empty!
};
- public synchronized Object[][] getContents() { return fContents; }
+ @Override
+ public synchronized Object[][] getContents() {
+ return fContents;
+ }
}
{ "Pentecost", "pinse" },
{ "Shrove Tuesday", "hvidetirsdag" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
import com.ibm.icu.util.Holiday;
import com.ibm.icu.util.SimpleHoliday;
-public class HolidayBundle_da_DK extends ListResourceBundle
-{
- static private final Holiday[] fHolidays = {
- SimpleHoliday.NEW_YEARS_DAY,
- new SimpleHoliday(Calendar.APRIL, 30, -Calendar.FRIDAY, "General Prayer Day"),
- new SimpleHoliday(Calendar.JUNE, 5, "Constitution Day"),
- SimpleHoliday.CHRISTMAS_EVE,
- SimpleHoliday.CHRISTMAS,
- SimpleHoliday.BOXING_DAY,
- SimpleHoliday.NEW_YEARS_EVE,
+public class HolidayBundle_da_DK extends ListResourceBundle {
+ static private final Holiday[] fHolidays = { SimpleHoliday.NEW_YEARS_DAY,
+ new SimpleHoliday(Calendar.APRIL, 30, -Calendar.FRIDAY, "General Prayer Day"),
+ new SimpleHoliday(Calendar.JUNE, 5, "Constitution Day"), SimpleHoliday.CHRISTMAS_EVE,
+ SimpleHoliday.CHRISTMAS, SimpleHoliday.BOXING_DAY, SimpleHoliday.NEW_YEARS_EVE,
- // Easter and related holidays
- EasterHoliday.MAUNDY_THURSDAY,
- EasterHoliday.GOOD_FRIDAY,
- EasterHoliday.EASTER_SUNDAY,
- EasterHoliday.EASTER_MONDAY,
- EasterHoliday.ASCENSION,
- EasterHoliday.WHIT_MONDAY,
- };
+ // Easter and related holidays
+ EasterHoliday.MAUNDY_THURSDAY, EasterHoliday.GOOD_FRIDAY, EasterHoliday.EASTER_SUNDAY,
+ EasterHoliday.EASTER_MONDAY, EasterHoliday.ASCENSION, EasterHoliday.WHIT_MONDAY, };
- static private final Object[][] fContents = {
- { "holidays", fHolidays },
- };
- public synchronized Object[][] getContents() { return fContents; }
+ static private final Object[][] fContents = { { "holidays", fHolidays }, };
+
+ @Override
+ public synchronized Object[][] getContents() {
+ return fContents;
+ }
}
{ "Whit Sunday", "Pfingstsonntag" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Christmas", "Christtag" },
{ "New Year's Day", "Neujahrstag" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Whit Monday", "\u0394\u03b5\u03cd\u03c4\u03b5\u03c1\u03b7 \u03bc\u03ad\u03c1\u03b1 \u03c4\u03bf\u03cd \u03a0\u03b5\u03bd\u03c4\u03b7\u03ba\u03bf\u03c3\u03c4\u03ae" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "", "" }, // Can't be empty!
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Labor Day", "Labour Day" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Labor Day", "Labour Day" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Whit Sunday", "Pentecost\u00e9s" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Victory Day", "F\u00EAte de la Victoire" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "Thanksgiving", "Giorno del Ringraziamento" },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
{ "", "" }, // Can't be empty!
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
static private final Object[][] fContents = {
{ "holidays", fHolidays },
};
+ @Override
public synchronized Object[][] getContents() { return fContents; }
}
private String resourceName;
private String encoding; // null for default encoding
private Class<?> root;
-
+
/**
* The one-based line number. Has the special value -1 before the
* object is initialized. Has the special value 0 after initialization
this.lineNo = -1;
try {
- InputStreamReader isr = (encoding == null)
+ InputStreamReader isr = (encoding == null)
? new InputStreamReader(is)
: new InputStreamReader(is, encoding);
public int getLineNumber() {
return lineNo;
}
-
+
/**
* Return a string description of the position of the last line
* returned by readLine() or readLineSkippingComments().
public String describePosition() {
return resourceName + ':' + lineNo;
}
-
+
/**
* Reset this reader so that the next call to
* <code>readLine()</code> returns the first line of the file
if (is == null) {
throw new IllegalArgumentException("Can't open " + resourceName);
}
-
+
InputStreamReader isr =
(encoding == null) ? new InputStreamReader(is) :
new InputStreamReader(is, encoding);
* associated with it. If the stream is already closed then invoking
* this method has no effect.
*/
+ @Override
public void close() throws IOException {
if (reader != null) {
reader.close();
* the locales and timezones of these are in sync.
*/
public BasicDurationFormatter(PeriodFormatter formatter,
- PeriodBuilder builder,
+ PeriodBuilder builder,
DateFormatter fallback,
long fallbackLimit) {
this.formatter = formatter;
}
protected BasicDurationFormatter(PeriodFormatter formatter,
- PeriodBuilder builder,
+ PeriodBuilder builder,
DateFormatter fallback,
long fallbackLimit,
String localeName,
this.timeZone = timeZone;
}
+ @Override
public String formatDurationFromNowTo(Date targetDate) {
long now = System.currentTimeMillis();
long duration = targetDate.getTime() - now;
return formatDurationFrom(duration, now);
}
- public String formatDurationFromNow(long duration) {
+ @Override
+public String formatDurationFromNow(long duration) {
return formatDurationFrom(duration, System.currentTimeMillis());
}
- public String formatDurationFrom(long duration, long referenceDate) {
+ @Override
+public String formatDurationFrom(long duration, long referenceDate) {
String s = doFallback(duration, referenceDate);
if (s == null) {
Period p = doBuild(duration, referenceDate);
return s;
}
- public DurationFormatter withLocale(String locName) {
+ @Override
+public DurationFormatter withLocale(String locName) {
if (!locName.equals(localeName)) {
PeriodFormatter newFormatter = formatter.withLocale(locName);
PeriodBuilder newBuilder = builder.withLocale(locName);
- DateFormatter newFallback = fallback == null
- ? null
+ DateFormatter newFallback = fallback == null
+ ? null
: fallback.withLocale(locName);
return new BasicDurationFormatter(newFormatter, newBuilder,
newFallback, fallbackLimit,
return this;
}
- public DurationFormatter withTimeZone(TimeZone tz) {
+ @Override
+public DurationFormatter withTimeZone(TimeZone tz) {
if (!tz.equals(timeZone)) {
PeriodBuilder newBuilder = builder.withTimeZone(tz);
- DateFormatter newFallback = fallback == null
- ? null
+ DateFormatter newFallback = fallback == null
+ ? null
: fallback.withTimeZone(tz);
return new BasicDurationFormatter(formatter, newBuilder,
newFallback, fallbackLimit,
}
protected String doFallback(long duration, long referenceDate) {
- if (fallback != null
+ if (fallback != null
&& fallbackLimit > 0
&& Math.abs(duration) >= fallbackLimit) {
return fallback.format(referenceDate + duration);
*
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setPeriodFormatter(
PeriodFormatter formatter) {
if (formatter != this.formatter) {
* @param builder the builder to use
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setPeriodBuilder(PeriodBuilder builder) {
if (builder != this.builder) {
this.builder = builder;
* @param fallback the fallback formatter to use, or null
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setFallback(DateFormatter fallback) {
boolean doReset = fallback == null
? this.fallback != null
* @param fallbackLimit the fallback limit to use, or 0 if none is desired.
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setFallbackLimit(long fallbackLimit) {
if (fallbackLimit < 0) {
fallbackLimit = 0;
}
/**
- * Set the name of the locale that will be used when
+ * Set the name of the locale that will be used when
* creating new formatters.
*
* @param localeName the name of the Locale
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setLocale(String localeName) {
if (!localeName.equals(this.localeName)) {
this.localeName = localeName;
}
/**
- * Set the name of the locale that will be used when
+ * Set the name of the locale that will be used when
* creating new formatters.
*
* @param timeZone The time zone to use.
* @return this BasicDurationFormatterFactory
*/
+ @Override
public DurationFormatterFactory setTimeZone(TimeZone timeZone) {
if (!timeZone.equals(this.timeZone)) {
this.timeZone = timeZone;
*
* @return a BasicDurationFormatter
*/
+ @Override
public DurationFormatter getFormatter() {
if (f == null) {
if (fallback != null) {
* Create the formatter. All local fields are already initialized.
*/
protected BasicDurationFormatter createFormatter() {
- return new BasicDurationFormatter(formatter, builder, fallback,
+ return new BasicDurationFormatter(formatter, builder, fallback,
fallbackLimit, localeName,
timeZone);
}
class BasicPeriodBuilderFactory implements PeriodBuilderFactory {
private PeriodFormatterDataService ds;
private Settings settings;
-
+
private static final short allBits = 0xff;
-
+
BasicPeriodBuilderFactory(PeriodFormatterDataService ds) {
this.ds = ds;
this.settings = new Settings();
return this;
}
Settings result = inUse ? copy() : this;
-
+
result.uset = (short)uset;
if ((uset & allBits) == allBits) {
lastUnit = i;
}
}
- if (lastUnit == -1) {
+ if (lastUnit == -1) {
// currently empty, but this might be transient so no fail
result.minUnit = result.maxUnit = null;
} else {
}
return (short)(uset & ~(1 << TimeUnit.MILLISECOND.ordinal));
}
-
+
TimeUnit effectiveMinUnit() {
if (allowMillis || minUnit != TimeUnit.MILLISECOND) {
return minUnit;
}
return TimeUnit.SECOND; // default for pathological case
}
-
+
Settings setMaxLimit(float maxLimit) {
int val = maxLimit <= 0 ? 0 : (int)(maxLimit*1000);
if (maxLimit == val) {
return this
.setAllowZero(data.allowZero())
.setWeeksAloneOnly(data.weeksAloneOnly())
- .setAllowMilliseconds(data.useMilliseconds() != DataRecord.EMilliSupport.NO);
+ .setAllowMilliseconds(data.useMilliseconds() != DataRecord.EMilliSupport.NO);
}
Settings setInUse() {
return Period.moreThan(maxLimit/1000f, maxUnit).inPast(inPast);
}
}
-
+
if (minLimit > 0) {
TimeUnit emu = effectiveMinUnit();
long emud = approximateDurationOf(emu);
}
}
+ @Override
public PeriodBuilderFactory setAvailableUnitRange(TimeUnit minUnit,
TimeUnit maxUnit) {
int uset = 0;
return this;
}
- public PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit,
+ @Override
+ public PeriodBuilderFactory setUnitIsAvailable(TimeUnit unit,
boolean available) {
int uset = settings.uset;
if (available) {
return this;
}
+ @Override
public PeriodBuilderFactory setMaxLimit(float maxLimit) {
settings = settings.setMaxLimit(maxLimit);
return this;
}
+ @Override
public PeriodBuilderFactory setMinLimit(float minLimit) {
settings = settings.setMinLimit(minLimit);
return this;
}
+ @Override
public PeriodBuilderFactory setAllowZero(boolean allow) {
settings = settings.setAllowZero(allow);
return this;
}
+ @Override
public PeriodBuilderFactory setWeeksAloneOnly(boolean aloneOnly) {
settings = settings.setWeeksAloneOnly(aloneOnly);
return this;
}
+ @Override
public PeriodBuilderFactory setAllowMilliseconds(boolean allow) {
settings = settings.setAllowMilliseconds(allow);
return this;
}
+ @Override
public PeriodBuilderFactory setLocale(String localeName) {
settings = settings.setLocale(localeName);
return this;
}
-
+
+ @Override
public PeriodBuilderFactory setTimeZone(TimeZone timeZone) {
// ignore this
return this;
* @param unit the single TimeUnit with which to represent times
* @return a builder
*/
+ @Override
public PeriodBuilder getFixedUnitBuilder(TimeUnit unit) {
return FixedUnitBuilder.get(unit, getSettings());
}
*
* @return a builder
*/
+ @Override
public PeriodBuilder getSingleUnitBuilder() {
return SingleUnitBuilder.get(getSettings());
}
*
* @return a builder
*/
+ @Override
public PeriodBuilder getOneOrTwoUnitBuilder() {
return OneOrTwoUnitBuilder.get(getSettings());
}
*
* @return a builder
*/
+ @Override
public PeriodBuilder getMultiUnitBuilder(int periodCount) {
return MultiUnitBuilder.get(periodCount, getSettings());
}
abstract class PeriodBuilderImpl implements PeriodBuilder {
protected BasicPeriodBuilderFactory.Settings settings;
-
+
+ @Override
public Period create(long duration) {
return createWithReferenceDate(duration, System.currentTimeMillis());
}
return BasicPeriodBuilderFactory.approximateDurationOf(unit);
}
+ @Override
public Period createWithReferenceDate(long duration, long referenceDate) {
boolean inPast = duration < 0;
if (inPast) {
return ts;
}
+ @Override
public PeriodBuilder withTimeZone(TimeZone timeZone) {
// ignore the time zone
return this;
}
+ @Override
public PeriodBuilder withLocale(String localeName) {
BasicPeriodBuilderFactory.Settings newSettings = settings.setLocale(localeName);
if (newSettings != settings) {
protected abstract PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse);
- protected abstract Period handleCreate(long duration, long referenceDate,
+ protected abstract Period handleCreate(long duration, long referenceDate,
boolean inPast);
protected PeriodBuilderImpl(BasicPeriodBuilderFactory.Settings settings) {
class FixedUnitBuilder extends PeriodBuilderImpl {
private TimeUnit unit;
-
+
public static FixedUnitBuilder get(TimeUnit unit, BasicPeriodBuilderFactory.Settings settingsToUse) {
if (settingsToUse != null && (settingsToUse.effectiveSet() & (1 << unit.ordinal)) != 0) {
return new FixedUnitBuilder(unit, settingsToUse);
this.unit = unit;
}
+ @Override
protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) {
return get(unit, settingsToUse);
}
- protected Period handleCreate(long duration, long referenceDate,
+ @Override
+ protected Period handleCreate(long duration, long referenceDate,
boolean inPast) {
if (unit == null) {
return null;
return new SingleUnitBuilder(settings);
}
+ @Override
protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) {
return SingleUnitBuilder.get(settingsToUse);
}
- protected Period handleCreate(long duration, long referenceDate,
+ @Override
+ protected Period handleCreate(long duration, long referenceDate,
boolean inPast) {
short uset = settings.effectiveSet();
for (int i = 0; i < TimeUnit.units.length; ++i) {
return new OneOrTwoUnitBuilder(settings);
}
+ @Override
protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) {
return OneOrTwoUnitBuilder.get(settingsToUse);
}
- protected Period handleCreate(long duration, long referenceDate,
+ @Override
+ protected Period handleCreate(long duration, long referenceDate,
boolean inPast) {
Period period = null;
short uset = settings.effectiveSet();
return null;
}
+ @Override
protected PeriodBuilder withSettings(BasicPeriodBuilderFactory.Settings settingsToUse) {
return MultiUnitBuilder.get(nPeriods, settingsToUse);
}
- protected Period handleCreate(long duration, long referenceDate,
+ @Override
+ protected Period handleCreate(long duration, long referenceDate,
boolean inPast) {
Period period = null;
int n = 0;
private PeriodFormatterData data;
private Customizations customs;
- BasicPeriodFormatter(BasicPeriodFormatterFactory factory,
+ BasicPeriodFormatter(BasicPeriodFormatterFactory factory,
String localeName,
- PeriodFormatterData data,
+ PeriodFormatterData data,
Customizations customs) {
this.factory = factory;
this.localeName = localeName;
this.customs = customs;
}
- public String format(Period period) {
+ @Override
+public String format(Period period) {
if (!period.isSet()) {
throw new IllegalArgumentException("period is not set");
}
return format(period.timeLimit, period.inFuture, period.counts);
}
+ @Override
public PeriodFormatter withLocale(String locName) {
if (!this.localeName.equals(locName)) {
PeriodFormatterData newData = factory.getData(locName);
- return new BasicPeriodFormatter(factory, locName, newData,
+ return new BasicPeriodFormatter(factory, locName, newData,
customs);
}
return this;
}
}
- // if the data does not allow formatting of zero periods,
+ // if the data does not allow formatting of zero periods,
// remove these from consideration. If the result has no
// periods set, return null to indicate we could not format
// the duration.
// set, merge them with seconds and force display of seconds to
// decimal with 3 places.
boolean forceD3Seconds = false;
- if (data.useMilliseconds() != EMilliSupport.YES &&
+ if (data.useMilliseconds() != EMilliSupport.YES &&
(mask & (1 << TimeUnit.MILLISECOND.ordinal)) != 0) {
int sx = TimeUnit.SECOND.ordinal;
int mx = TimeUnit.MILLISECOND.ordinal;
int sf = 1 << sx;
int mf = 1 << mx;
- switch (data.useMilliseconds()) {
+ switch (data.useMilliseconds()) {
case EMilliSupport.WITH_SECONDS: {
// if there are seconds, merge with seconds, otherwise leave alone
if ((mask & sf) != 0) {
cv = ECountVariant.INTEGER;
}
boolean isLast = i == last;
- boolean mustSkip = data.appendUnit(unit, count, cv, customs.unitVariant,
+ boolean mustSkip = data.appendUnit(unit, count, cv, customs.unitVariant,
countSep, useDigitPrefix, multiple, isLast, wasSkipped, sb);
skipped |= mustSkip;
wasSkipped = false;
/**
* Set the locale for this factory.
*/
+ @Override
public PeriodFormatterFactory setLocale(String localeName) {
data = null;
this.localeName = localeName;
* @param display true if limits will be displayed
* @return this PeriodFormatterFactory
*/
+ @Override
public PeriodFormatterFactory setDisplayLimit(boolean display) {
updateCustomizations().displayLimit = display;
return this;
* @param display true if past and future will be displayed
* @return this PeriodFormatterFactory
*/
+ @Override
public PeriodFormatterFactory setDisplayPastFuture(boolean display) {
updateCustomizations().displayDirection = display;
return this;
* @param variant the variant indicating separators will be displayed
* @return this PeriodFormatterFactory
*/
+ @Override
public PeriodFormatterFactory setSeparatorVariant(int variant) {
updateCustomizations().separatorVariant = (byte) variant;
return this;
* @param variant the variant to use
* @return this PeriodFormatterFactory
*/
+ @Override
public PeriodFormatterFactory setUnitVariant(int variant) {
updateCustomizations().unitVariant = (byte) variant;
return this;
* @param variant the variant to use
* @return this PeriodFormatterFactory
*/
+ @Override
public PeriodFormatterFactory setCountVariant(int variant) {
updateCustomizations().countVariant = (byte) variant;
return this;
return customizations.countVariant;
}
+ @Override
public PeriodFormatter getFormatter() {
customizationsInUse = true;
- return new BasicPeriodFormatter(this, localeName, getData(),
+ return new BasicPeriodFormatter(this, localeName, getData(),
customizations);
}
byte separatorVariant = ESeparatorVariant.FULL;
byte unitVariant = EUnitVariant.PLURALIZED;
byte countVariant = ECountVariant.INTEGER;
-
+
public Customizations copy() {
Customizations result = new Customizations();
result.displayLimit = displayLimit;
/**
* Return the default service instance. This uses the default data service.
- *
+ *
* @return an BasicPeriodFormatterService
*/
public static BasicPeriodFormatterService getInstance() {
/**
* Construct a BasicPeriodFormatterService using the given
* PeriodFormatterDataService.
- *
+ *
* @param ds the data service to use
*/
public BasicPeriodFormatterService(PeriodFormatterDataService ds) {
this.ds = ds;
}
+ @Override
public DurationFormatterFactory newDurationFormatterFactory() {
return new BasicDurationFormatterFactory(this);
}
+ @Override
public PeriodFormatterFactory newPeriodFormatterFactory() {
return new BasicPeriodFormatterFactory(ds);
}
+ @Override
public PeriodBuilderFactory newPeriodBuilderFactory() {
return new BasicPeriodBuilderFactory(ds);
}
+ @Override
public Collection<String> getAvailableLocaleNames() {
return ds.getAvailableLocales();
}
* In addition Period can either represent the duration as being into the past
* or future, and as being more or less than the defined value.
* <p>
- * Use a PeriodFormatter to convert a Period to a String.
+ * Use a PeriodFormatter to convert a Period to a String.
* <p>
- * Periods are immutable. Mutating operations return the new
+ * Periods are immutable. Mutating operations return the new
* result leaving the original unchanged.
* <p>
* Example:<pre>
public Period omit(TimeUnit unit) {
return setTimeUnitInternalValue(unit, 0);
}
-
+
/**
* Mark the duration as being at the defined duration.
*
}
/**
- * Returns true if this represents a
+ * Returns true if this represents a
* duration into the future.
- * @return true if this represents a
+ * @return true if this represents a
* duration into the future.
*/
public boolean isInFuture() {
}
/**
- * Returns true if this represents a
+ * Returns true if this represents a
* duration into the past
- * @return true if this represents a
+ * @return true if this represents a
* duration into the past
*/
public boolean isInPast () {
return timeLimit == ETimeLimit.LT;
}
- /**
+ /**
* Returns true if rhs extends Period and
* the two Periods are equal.
* @param rhs the object to compare to
* @return true if rhs is a Period and is equal to this
*/
+ @Override
public boolean equals(Object rhs) {
try {
return equals((Period)rhs);
return false;
}
- /**
- * Returns the hashCode.
+ /**
+ * Returns the hashCode.
* @return the hashCode
*/
- public int hashCode() {
+ @Override
+public int hashCode() {
int hc = (timeLimit << 1) | (inFuture ? 1 : 0);
for (int i = 0; i < counts.length; ++i) {
hc = (hc << 2) ^ counts[i];
return setTimeUnitInternalValue(unit, (int)(value * 1000) + 1);
}
- /**
+ /**
* Sets the period to have the provided value, 1/1000 of the
* unit plus 1. Thus unset values are '0', 1' is the set value '0',
* 2 is the set value '1/1000', 3 is the set value '2/1000' etc.
*/
private static void checkCount(float count) {
if (count < 0) {
- throw new IllegalArgumentException("count (" + count +
+ throw new IllegalArgumentException("count (" + count +
") cannot be negative");
}
}
package com.ibm.icu.impl.duration;
/**
- * 'Enum' for individual time units. Not an actual enum so that it can be
+ * 'Enum' for individual time units. Not an actual enum so that it can be
* used by Java 1.4.
*/
public final class TimeUnit {
this.ordinal = (byte) ordinal;
}
+ @Override
public String toString() {
return name;
}
-
- /** Represents a year. */
+
+ /** Represents a year. */
public static final TimeUnit YEAR = new TimeUnit("year", 0);
- /** Represents a month. */
+ /** Represents a month. */
public static final TimeUnit MONTH = new TimeUnit("month", 1);
- /** Represents a week. */
+ /** Represents a week. */
public static final TimeUnit WEEK = new TimeUnit("week", 2);
- /** Represents a day. */
+ /** Represents a day. */
public static final TimeUnit DAY = new TimeUnit("day", 3);
- /** Represents an hour. */
+ /** Represents an hour. */
public static final TimeUnit HOUR = new TimeUnit("hour", 4);
- /** Represents a minute. */
+ /** Represents a minute. */
public static final TimeUnit MINUTE = new TimeUnit("minute", 5);
- /** Represents a second. */
+ /** Represents a second. */
public static final TimeUnit SECOND = new TimeUnit("second", 6);
- /** Represents a millisecond. */
+ /** Represents a millisecond. */
public static final TimeUnit MILLISECOND = new TimeUnit("millisecond", 7);
/** Returns the next larger time unit, or null if this is the largest. */
// compute entire expression using 'long'. differs from initializtion of
// a single constant
static final long[] approxDurations = {
- 36525L*24*60*60*10, 3045*24*60*60*10L, 7*24*60*60*1000L, 24*60*60*1000L,
+ 36525L*24*60*60*10, 3045*24*60*60*10L, 7*24*60*60*1000L, 24*60*60*1000L,
60*60*1000L, 60*1000L, 1000L, 1L
};
}
availableLocales = Collections.unmodifiableList(localeNames);
}
+ @Override
public PeriodFormatterData get(String localeName) {
// remove tag info including calendar, we don't use the calendar
int x = localeName.indexOf('@');
}
}
+ @Override
public Collection<String> getAvailableLocales() {
return availableLocales;
}
}
}
+ @Override
public boolean open(String title) {
if (getTag().equals(title)) {
nameStack.add(title);
return false;
}
+ @Override
public boolean close() {
int ix = nameStack.size() - 1;
String name = nameStack.get(ix);
return false;
}
+ @Override
public boolean bool(String name) {
String s = string(name);
if (s != null) {
return false;
}
+ @Override
public boolean[] boolArray(String name) {
String[] sa = stringArray(name);
if (sa != null) {
return null;
}
+ @Override
public char character(String name) {
String s = string(name);
if (s != null) {
return '\uffff';
}
+ @Override
public char[] characterArray(String name) {
String[] sa = stringArray(name);
if (sa != null) {
return null;
}
+ @Override
public byte namedIndex(String name, String[] names) {
String sa = string(name);
if (sa != null) {
return (byte) -1;
}
+ @Override
public byte[] namedIndexArray(String name, String[] names) {
String[] sa = stringArray(name);
if (sa != null) {
return null;
}
+ @Override
public String string(String name) {
if (match(name)) {
String result = readData();
return null;
}
+ @Override
public String[] stringArray(String name) {
if (match(name + "List")) {
List<String> list = new ArrayList<String>();
return null;
}
+ @Override
public String[][] stringTable(String name) {
if (match(name + "Table")) {
List<String[]> list = new ArrayList<String[]>();
this.nameStack = new ArrayList<String>();
}
+ @Override
public boolean open(String title) {
newline();
writeString("<" + title + ">");
return true;
}
+ @Override
public boolean close() {
int ix = nameStack.size() - 1;
if (ix >= 0) {
}
}
+ @Override
public void bool(String name, boolean value) {
internalString(name, String.valueOf(value));
}
+ @Override
public void boolArray(String name, boolean[] values) {
if (values != null) {
String[] stringValues = new String[values.length];
return String.valueOf(value);
}
+ @Override
public void character(String name, char value) {
if (value != '\uffff') {
internalString(name, ctos(value));
}
}
+ @Override
public void characterArray(String name, char[] values) {
if (values != null) {
String[] stringValues = new String[values.length];
}
}
+ @Override
public void namedIndex(String name, String[] names, int value) {
if (value >= 0) {
internalString(name, names[value]);
}
}
+ @Override
public void namedIndexArray(String name, String[] names, byte[] values) {
if (values != null) {
String[] stringValues = new String[values.length];
}
}
+ @Override
public void string(String name, String value) {
internalString(name, normalize(value));
}
+ @Override
public void stringArray(String name, String[] values) {
if (values != null) {
push(name + "List");
}
}
+ @Override
public void stringTable(String name, String[][] values) {
if (values != null) {
push(name + "Table");
_hash = AsciiUtil.toLowerString(key).hashCode();
}
+ @Override
public boolean equals(Object o) {
if (this == o) {
return true;
return false;
}
+ @Override
public int hashCode() {
return _hash;
}
return _variant;
}
+ @Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
&& _variant.equals(other._variant);
}
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
if (_language.length() > 0) {
return buf.toString();
}
+ @Override
public int hashCode() {
int h = _hash;
if (h == 0) {
}
}
+ @Override
public boolean equals(Object obj) {
if (JDKIMPL) {
return (this == obj) ||
&& AsciiUtil.caseIgnoreMatch(((Key)obj)._vart, this._vart);
}
+ @Override
public int compareTo(Key other) {
int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang);
if (res == 0) {
return res;
}
+ @Override
public int hashCode() {
int h = _hash;
if (h == 0) {
public Cache() {
}
+ @Override
protected Key normalizeKey(Key key) {
return Key.normalize(key);
}
+ @Override
protected BaseLocale createObject(Key key) {
return new BaseLocale(key._lang, key._scrt, key._regn, key._vart);
}
return _key + LanguageTag.SEP + _value;
}
+ @Override
public String toString() {
return getID();
}
return _s;
}
+ @Override
public int hashCode() {
return AsciiUtil.toLowerString(_s).hashCode();
}
+ @Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
return _c;
}
+ @Override
public int hashCode() {
return AsciiUtil.toLower(_c);
}
+ @Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
private static class CodepointsTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("[0-9a-fA-F]{4,6}(-[0-9a-fA-F]{4,6})*");
+ @Override
boolean isWellFormed(String value) {
return pat.matcher(value).matches();
}
private static class ReorderCodeTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("[a-zA-Z]{3,8}(-[a-zA-Z]{3,8})*");
+ @Override
boolean isWellFormed(String value) {
return pat.matcher(value).matches();
}
private static class RgKeyValueTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("([a-zA-Z]{2}|[0-9]{3})[zZ]{4}");
+ @Override
boolean isWellFormed(String value) {
return pat.matcher(value).matches();
}
}
-
+
private static class SubdivisionKeyValueTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("([a-zA-Z]{2}|[0-9]{3})");
+ @Override
boolean isWellFormed(String value) {
return pat.matcher(value).matches();
}
}
-
+
private static class PrivateUseKeyValueTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("[a-zA-Z0-9]{3,8}(-[a-zA-Z0-9]{3,8})*");
+ @Override
boolean isWellFormed(String value) {
return pat.matcher(value).matches();
}
public static Set<String> getBcp47Keys() {
return BCP47_KEYS.keySet();
};
-
+
public static Set<String> getBcp47KeyTypes(String key) {
return BCP47_KEYS.get(key);
};
/*
* BNF in RFC5464
- *
+ *
* Language-Tag = langtag ; normal language tags
* / privateuse ; private use tag
* / grandfathered ; grandfathered tags
*
- *
+ *
* langtag = language
* ["-" script]
* ["-" region]
* *("-" variant)
* *("-" extension)
* ["-" privateuse]
- *
+ *
* language = 2*3ALPHA ; shortest ISO 639 code
* ["-" extlang] ; sometimes followed by
* ; extended language subtags
* / 4ALPHA ; or reserved for future use
* / 5*8ALPHA ; or registered language subtag
- *
+ *
* extlang = 3ALPHA ; selected ISO 639 codes
* *2("-" 3ALPHA) ; permanently reserved
- *
+ *
* script = 4ALPHA ; ISO 15924 code
- *
+ *
* region = 2ALPHA ; ISO 3166-1 code
* / 3DIGIT ; UN M.49 code
- *
+ *
* variant = 5*8alphanum ; registered variants
* / (DIGIT 3alphanum)
- *
+ *
* extension = singleton 1*("-" (2*8alphanum))
- *
+ *
* ; Single alphanumerics
* ; "x" reserved for private use
* singleton = DIGIT ; 0 - 9
* / %x59-5A ; Y - Z
* / %x61-77 ; a - w
* / %x79-7A ; y - z
- *
+ *
* privateuse = "x" 1*("-" (1*8alphanum))
- *
+ *
*/
public static LanguageTag parse(String languageTag, ParseStatus sts) {
if (sts == null) {
if (s.length() == 0) {
sts._errorMsg = "Empty subtag";
} else {
- sts._errorMsg = "Invalid subtag: " + s;
+ sts._errorMsg = "Invalid subtag: " + s;
}
}
return AsciiUtil.toLowerString(s);
}
+ @Override
public String toString() {
StringBuilder sb = new StringBuilder();
}
+ @Override
public String toString() {
return _id;
}
return _id;
}
+ @Override
public int hashCode() {
return _id.hashCode();
}
+ @Override
public boolean equals(Object other) {
if (this == other) {
return true;
* @return true if we are not at the end of the iteration, false otherwise.
* @see com.ibm.icu.util.ValueIterator.Element
*/
+ @Override
public boolean next(ValueIterator.Element element)
{
if (m_current_ >= m_limit_) {
return true;
}
}
- /*
+ /*
// "if (m_current_ >= m_limit_)" would not return true
// because it can never be reached due to:
// 1) It has already been checked earlier
* UCharacter.MIN_VALUE or X if a setRange(X, Y) has been called previously.
* </p>
*/
+ @Override
public void reset()
{
m_current_ = m_start_;
* @exception IllegalArgumentException thrown when attempting to set an
* illegal range. E.g limit <= start
*/
+ @Override
public void setRange(int start, int limit)
{
if (start >= limit) {
* The floating point arithmetic provided by this class is defined by the ANSI X3.274-1996 standard, and is also
* documented at <code>http://www2.hursley.ibm.com/decimal</code> <br>
* <i>[This URL will change.]</i>
- *
+ *
* <h3>Operator methods</h3>
* <p>
* Operations on <code>BigDecimal</code> numbers are controlled by a {@link MathContext} object, which provides the
* <p>
* The names of methods in this class follow the conventions established by <code>java.lang.Number</code>,
* <code>java.math.BigInteger</code>, and <code>java.math.BigDecimal</code> in Java 1.1 and Java 1.2.
- *
+ *
* @see MathContext
* @author Mike Cowlishaw
* @stable ICU 2.0
/* properties constant public */// useful to others
/**
* The <code>BigDecimal</code> constant "0".
- *
+ *
* @see #ONE
* @see #TEN
* @stable ICU 2.0
/**
* The <code>BigDecimal</code> constant "1".
- *
+ *
* @see #TEN
* @see #ZERO
* @stable ICU 2.0
/**
* The <code>BigDecimal</code> constant "10".
- *
+ *
* @see #ONE
* @see #ZERO
* @stable ICU 2.0
// the rounding modes (copied here for upwards compatibility)
/**
* Rounding mode to round to a more positive number.
- *
+ *
* @see MathContext#ROUND_CEILING
* @stable ICU 2.0
*/
/**
* Rounding mode to round towards zero.
- *
+ *
* @see MathContext#ROUND_DOWN
* @stable ICU 2.0
*/
/**
* Rounding mode to round to a more negative number.
- *
+ *
* @see MathContext#ROUND_FLOOR
* @stable ICU 2.0
*/
/**
* Rounding mode to round to nearest neighbor, where an equidistant value is rounded down.
- *
+ *
* @see MathContext#ROUND_HALF_DOWN
* @stable ICU 2.0
*/
/**
* Rounding mode to round to nearest neighbor, where an equidistant value is rounded to the nearest even neighbor.
- *
+ *
* @see MathContext#ROUND_HALF_EVEN
* @stable ICU 2.0
*/
/**
* Rounding mode to round to nearest neighbor, where an equidistant value is rounded up.
- *
+ *
* @see MathContext#ROUND_HALF_UP
* @stable ICU 2.0
*/
/**
* Rounding mode to assert that no rounding is necessary.
- *
+ *
* @see MathContext#ROUND_UNNECESSARY
* @stable ICU 2.0
*/
/**
* Rounding mode to round away from zero.
- *
+ *
* @see MathContext#ROUND_UP
* @stable ICU 2.0
*/
* <ul>
* <li>ispos -- the number is positive <li>iszero -- the number is zero <li>isneg -- the number is negative
* </ul>
- *
+ *
* @serial
*/
private byte ind; // assumed undefined
* <p>
* This property is an optimization; it allows us to defer number layout until it is actually needed as a string,
* hence avoiding unnecessary formatting.
- *
+ *
* @serial
*/
private byte form = (byte) com.ibm.icu.math.MathContext.PLAIN; // assumed PLAIN
* <p>
* If the first byte is 0 then the value of the number is zero (and mant.length=1, except when constructed from a
* plain number, for example, 0.000).
- *
+ *
* @serial
*/
private byte mant[]; // assumed null
* The exponent.
* <p>
* For fixed point arithmetic, scale is <code>-exp</code>, and can apply to zero.
- *
+ *
* Note that this property can have a value less than MinExp when the mantissa has more than one digit.
- *
+ *
* @serial
*/
private int exp;
* <p>
* <i>(Note: this constructor is provided only in the <code>com.ibm.icu.math</code> version of the BigDecimal class.
* It would not be present in a <code>java.math</code> version.)</i>
- *
+ *
* @param bd The <code>BigDecimal</code> to be translated.
* @stable ICU 2.0
*/
* <p>
* The <code>BigDecimal</code> will contain only decimal digits, prefixed with a leading minus sign (hyphen) if the
* <code>BigInteger</code> is negative. A leading zero will be present only if the <code>BigInteger</code> is zero.
- *
+ *
* @param bi The <code>BigInteger</code> to be converted.
* @stable ICU 2.0
*/
* The <code>BigDecimal</code> will contain only decimal digits, (with an embedded decimal point followed by <code>
* scale</code> decimal digits if the scale is positive), prefixed with a leading minus sign (hyphen) if the <code>
* BigInteger</code> is negative. A leading zero will be present only if the <code>BigInteger</code> is zero.
- *
+ *
* @param bi The <code>BigInteger</code> to be converted.
* @param scale The <code>int</code> specifying the scale.
* @throws NumberFormatException If the scale is negative.
* <p>
* Using this constructor is faster than using the <code>BigDecimal(String)</code> constructor if the string is
* already available in character array form.
- *
+ *
* @param inchars The <code>char[]</code> array containing the number to be converted.
* @throws NumberFormatException If the parameter is not a valid number.
* @stable ICU 2.0
* <p>
* Using this constructor is faster than using the <code>BigDecimal(String)</code> constructor if the string is
* already available within a character array.
- *
+ *
* @param inchars The <code>char[]</code> array containing the number to be converted.
* @param offset The <code>int</code> offset into the array of the start of the number to be converted.
* @param length The <code>int</code> length of the number.
if (dvalue < 0)
bad(inchars); // not base 10
} else
- dvalue = ((int) (sj)) - ((int) ('0'));
+ dvalue = ((sj)) - (('0'));
exp = (exp * 10) + dvalue;
}
}/* j */
j++; // at dot
sj = inchars[j];
if (sj <= '9')
- mant[i] = (byte) (((int) (sj)) - ((int) ('0')));/* easy */
+ mant[i] = (byte) (((sj)) - (('0')));/* easy */
else {
dvalue = UCharacter.digit(sj, 10);
if (dvalue < 0)
for (; $5 > 0; $5--, i++) {
if (i == dotoff)
j++;
- mant[i] = (byte) (((int) (inchars[j])) - ((int) ('0')));
+ mant[i] = (byte) (((inchars[j])) - (('0')));
j++;
}
}/* i */
* </code> to a <code>String</code> using the <code>Double.toString()</code> method and then using the
* {@link #BigDecimal(java.lang.String)} constructor. To get that result, use the static {@link #valueOf(double)}
* method to construct a <code>BigDecimal</code> from a <code>double</code>.
- *
+ *
* @param num The <code>double</code> to be converted.
* @throws NumberFormatException If the parameter is infinite or not a number.
* @stable ICU 2.0
* Constructs a <code>BigDecimal</code> which is the exact decimal representation of the 32-bit signed binary
* integer parameter. The <code>BigDecimal</code> will contain only decimal digits, prefixed with a leading minus
* sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero.
- *
+ *
* @param num The <code>int</code> to be converted.
* @stable ICU 2.0
*/
* Constructs a <code>BigDecimal</code> which is the exact decimal representation of the 64-bit signed binary
* integer parameter. The <code>BigDecimal</code> will contain only decimal digits, prefixed with a leading minus
* sign (hyphen) if the parameter is negative. A leading zero will be present only if the parameter is zero.
- *
+ *
* @param num The <code>long</code> to be converted.
* @stable ICU 2.0
*/
* decimal point, and exponential notation may be used. They follow conventional syntax, and may not contain blanks.
* <p>
* Some valid strings from which a <code>BigDecimal</code> might be constructed are:
- *
+ *
* <pre>
- *
+ *
* "0" -- Zero "12" -- A whole number "-76" -- A signed whole number "12.70" -- Some decimal places "+0.003" -- Plus
* sign is allowed "17." -- The same as 17 ".5" -- The same as 0.5 "4E+9" -- Exponential notation "0.73e-7" --
* Exponential notation
- *
+ *
* </pre>
* <p>
* (Exponential notation means that the number includes an optional sign and a power of ten following an
* <p>
* Any digits in the parameter must be decimal; that is, <code>Character.digit(c, 10)</code> (where <code>c</code>
* is the character in question) would not return -1.
- *
+ *
* @param string The <code>String</code> to be converted.
* @throws NumberFormatException If the parameter is not a valid number.
* @stable ICU 2.0
* The same as {@link #abs(MathContext)}, where the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
* <p>
* The length of the decimal part (the scale) of the result will be <code>this.scale()</code>
- *
+ *
* @return A <code>BigDecimal</code> whose value is the absolute value of this <code>BigDecimal</code>.
* @stable ICU 2.0
*/
* If the current object is zero or positive, then the same result as invoking the {@link #plus(MathContext)} method
* with the same parameter is returned. Otherwise, the same result as invoking the {@link #negate(MathContext)}
* method with the same parameter is returned.
- *
+ *
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is the absolute value of this <code>BigDecimal</code>.
* @stable ICU 2.0
* context is <code>new MathContext(0, MathContext.PLAIN)</code>.
* <p>
* The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the addition.
* @return A <code>BigDecimal</code> whose value is <code>this+rhs</code>, using fixed point arithmetic.
* @stable ICU 2.0
* <p>
* Implements the addition (<b><code>+</code></b>) operator (as defined in the decimal documentation, see
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the addition.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>this+rhs</code>.
* <p>
* The same as {@link #compareTo(BigDecimal, MathContext)}, where the <code>BigDecimal</code> is <code>rhs</code>,
* and the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @return An <code>int</code> whose value is -1, 0, or 1 as <code>this</code> is numerically less than, equal to,
* or greater than <code>rhs</code>.
* @stable ICU 2.0
*/
+ @Override
public int compareTo(com.ibm.icu.math.BigDecimal rhs) {
return this.compareTo(rhs, plainMC);
}
* </table>
* <p>
* A {@link #compareTo(BigDecimal)} method is also provided.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @param set The <code>MathContext</code> arithmetic settings.
* @return An <code>int</code> whose value is -1, 0, or 1 as <code>this</code> is numerically less than, equal to,
* <p>
* The same as {@link #divide(BigDecimal, int)}, where the <code>BigDecimal</code> is <code>rhs</code>, and the
* rounding mode is {@link MathContext#ROUND_HALF_UP}.
- *
+ *
* The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if
* the latter were formatted without exponential notation.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the division.
* @return A plain <code>BigDecimal</code> whose value is <code>this/rhs</code>, using fixed point arithmetic.
* @throws ArithmeticException If <code>rhs</code> is zero.
* The length of the decimal part (the scale) of the result will therefore be the same as the scale of the current
* object, if the latter were formatted without exponential notation.
* <p>
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the division.
* @param round The <code>int</code> rounding mode to be used for the division (see the {@link MathContext} class).
* @return A plain <code>BigDecimal</code> whose value is <code>this/rhs</code>, using fixed point arithmetic and
* The length of the decimal part (the scale) of the result will be the same as the scale of the current object, if
* the latter were formatted without exponential notation.
* <p>
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the division.
* @param scale The <code>int</code> scale to be used for the result.
* @param round The <code>int</code> rounding mode to be used for the division (see the {@link MathContext} class).
* <p>
* Implements the division (<b><code>/</code></b>) operator (as defined in the decimal documentation, see
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the division.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>this/rhs</code>.
* <p>
* The same as {@link #divideInteger(BigDecimal, MathContext)}, where the <code>BigDecimal</code> is <code>rhs
* </code>, and the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the integer division.
* @return A <code>BigDecimal</code> whose value is the integer part of <code>this/rhs</code>.
* @throws ArithmeticException if <code>rhs</code> is zero.
* <p>
* Implements the integer division operator (as defined in the decimal documentation, see {@link BigDecimal class
* header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the integer division.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is the integer part of <code>this/rhs</code>.
* <p>
* The same as {@link #max(BigDecimal, MathContext)}, where the <code>BigDecimal</code> is <code>rhs</code>, and the
* context is <code>new MathContext(0, MathContext.PLAIN)</code>.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @return A <code>BigDecimal</code> whose value is the maximum of <code>this</code> and <code>rhs</code>.
* @stable ICU 2.0
* (using the same <code>MathContext</code> parameter) is returned. Otherwise, the result of calling the
* {@link #plus(MathContext)} method on the first parameter object (using the same <code>MathContext</code>
* parameter) is returned.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is the maximum of <code>this</code> and <code>rhs</code>.
* <p>
* The same as {@link #min(BigDecimal, MathContext)}, where the <code>BigDecimal</code> is <code>rhs</code>, and the
* context is <code>new MathContext(0, MathContext.PLAIN)</code>.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @return A <code>BigDecimal</code> whose value is the minimum of <code>this</code> and <code>rhs</code>.
* @stable ICU 2.0
* (using the same <code>MathContext</code> parameter) is returned. Otherwise, the result of calling the
* {@link #plus(MathContext)} method on the first parameter object (using the same <code>MathContext</code>
* parameter) is returned.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the comparison.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is the minimum of <code>this</code> and <code>rhs</code>.
* <p>
* The length of the decimal part (the scale) of the result will be the sum of the scales of the operands, if they
* were formatted without exponential notation.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the multiplication.
* @return A <code>BigDecimal</code> whose value is <code>this*rhs</code>, using fixed point arithmetic.
* @stable ICU 2.0
* <p>
* Implements the multiplication (<b><code>*</code></b>) operator (as defined in the decimal documentation, see
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the multiplication.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>this*rhs</code>.
* .
* <p>
* The length of the decimal part (the scale) of the result will be be <code>this.scale()</code>
- *
- *
+ *
+ *
* @return A <code>BigDecimal</code> whose value is <code>-this</code>.
* @stable ICU 2.0
*/
* <p>
* Implements the negation (Prefix <b><code>-</code></b>) operator (as defined in the decimal documentation, see
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>-this</code>.
* @stable ICU 2.0
* The same as {@link #plus(MathContext)}, where the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
* <p>
* The length of the decimal part (the scale) of the result will be be <code>this.scale()</code>
- *
+ *
* @return A <code>BigDecimal</code> whose value is <code>+this</code>.
* @stable ICU 2.0
*/
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
* <p>
* This method is useful for rounding or otherwise applying a context to a decimal value.
- *
+ *
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>+this</code>.
* @stable ICU 2.0
* <p>
* In addition, the power must not be negative, as no <code>MathContext</code> is used and so the result would then
* always be 0.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the operation (the power).
* @return A <code>BigDecimal</code> whose value is <code>this**rhs</code>, using fixed point arithmetic.
* @throws ArithmeticException if <code>rhs</code> is out of range or is not a whole number.
* <p>
* If the <code>digits</code> setting of the <code>MathContext</code> parameter is 0, the power must be zero or
* positive.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the operation (the power).
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>this**rhs</code>.
* and the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
* <p>
* This is not the modulo operator -- the result may be negative.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the remainder operation.
* @return A <code>BigDecimal</code> whose value is the remainder of <code>this/rhs</code>, using fixed point
* arithmetic.
* and returns the result as a <code>BigDecimal</code> object.
* <p>
* This is not the modulo operator -- the result may be negative.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the remainder operation.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is the remainder of <code>this+rhs</code>.
* and the context is <code>new MathContext(0, MathContext.PLAIN)</code>.
* <p>
* The length of the decimal part (the scale) of the result will be the maximum of the scales of the two operands.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the subtraction.
* @return A <code>BigDecimal</code> whose value is <code>this-rhs</code>, using fixed point arithmetic.
* @stable ICU 2.0
* <p>
* Implements the subtraction (<b><code>-</code></b>) operator (as defined in the decimal documentation, see
* {@link BigDecimal class header}), and returns the result as a <code>BigDecimal</code> object.
- *
+ *
* @param rhs The <code>BigDecimal</code> for the right hand side of the subtraction.
* @param set The <code>MathContext</code> arithmetic settings.
* @return A <code>BigDecimal</code> whose value is <code>this-rhs</code>.
* Converts this <code>BigDecimal</code> to a <code>byte</code>. If the <code>BigDecimal</code> has a non-zero
* decimal part or is out of the possible range for a <code>byte</code> (8-bit signed integer) result then an <code>
* ArithmeticException</code> is thrown.
- *
+ *
* @return A <code>byte</code> equal in value to <code>this</code>.
* @throws ArithmeticException if <code>this</code> has a non-zero decimal part, or will not fit in a <code>byte</code>.
* @stable ICU 2.0
* The double produced is identical to result of expressing the <code>BigDecimal</code> as a <code>String</code> and
* then converting it using the <code>Double(String)</code> constructor; this can result in values of <code>
* Double.NEGATIVE_INFINITY</code> or <code>Double.POSITIVE_INFINITY</code>.
- *
+ *
* @return A <code>double</code> corresponding to <code>this</code>.
* @stable ICU 2.0
*/
+ @Override
public double doubleValue() {
// We go via a String [as does BigDecimal in JDK 1.2]
// Next line could possibly raise NumberFormatException
* are identical (they have the same characters in the same sequence).
* <p>
* The {@link #compareTo(BigDecimal, MathContext)} method should be used for more general comparisons.
- *
+ *
* @param obj The <code>Object</code> for the right hand side of the comparison.
* @return A <code>boolean</code> whose value <i>true</i> if and only if the operands have identical string
* representations.
* @see #compareTo(BigDecimal, MathContext)
*/
+ @Override
public boolean equals(java.lang.Object obj) {
com.ibm.icu.math.BigDecimal rhs;
int i = 0;
* The float produced is identical to result of expressing the <code>BigDecimal</code> as a <code>String</code> and
* then converting it using the <code>Float(String)</code> constructor; this can result in values of <code>
* Float.NEGATIVE_INFINITY</code> or <code>Float.POSITIVE_INFINITY</code>.
- *
+ *
* @return A <code>float</code> corresponding to <code>this</code>.
* @stable ICU 2.0
*/
+ @Override
public float floatValue() {
return java.lang.Float.valueOf(this.toString()).floatValue();
}
* Other rounding methods, and the use of exponential notation, can be selected by using
* {@link #format(int,int,int,int,int,int)}. Using the two-parameter form of the method has exactly the same effect
* as using the six-parameter form with the final four parameters all being -1.
- *
+ *
* @param before The <code>int</code> specifying the number of places before the decimal point. Use -1 for 'as many as are needed'.
* @param after The <code>int</code> specifying the number of places after the decimal point. Use -1 for 'as many as are needed'.
* @return A <code>String</code> representing this <code>BigDecimal</code>, laid out according to the specified parameters
* The special value <code>MathContext.ROUND_UNNECESSARY</code> may be used to detect whether non-zero digits are
* discarded -- if <code>exround</code> has this value than if non-zero digits would be discarded (rounded) during
* formatting then an <code>ArithmeticException</code> is thrown.
- *
+ *
* @param before The <code>int</code> specifying the number of places before the decimal point. Use -1 for 'as many as
* are needed'.
* @param after The <code>int</code> specifying the number of places after the decimal point. Use -1 for 'as many as
newa[i] = ' ';
}
}/* i */
- java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, i, a.length);
+ java.lang.System.arraycopy(a, 0, newa, i, a.length);
a = newa;
}
// [if p=before then it's just the right length]
// p is now offset of 'E', or 0
if (p == 0) { // no E part; add trailing blanks
newa = new char[(a.length + explaces) + 2];
- java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, a.length);
+ java.lang.System.arraycopy(a, 0, newa, 0, a.length);
{
int $14 = explaces + 2;
i = a.length;
badarg("format", 3, java.lang.String.valueOf(explaces));
if (places < explaces) { // need to insert zeros
newa = new char[(a.length + explaces) - places];
- java.lang.System.arraycopy((java.lang.Object) a, 0, (java.lang.Object) newa, 0, p + 2); // through E
+ java.lang.System.arraycopy(a, 0, newa, 0, p + 2); // through E
// and sign
{
int $15 = explaces - places;
newa[i] = '0';
}
}/* i */
- java.lang.System.arraycopy((java.lang.Object) a, p + 2, (java.lang.Object) newa, i, places); // remainder
+ java.lang.System.arraycopy(a, p + 2, newa, i, places); // remainder
// of
// exponent
a = newa;
* Note that two <code>BigDecimal</code> objects are only guaranteed to produce the same hashcode if they are
* exactly equal (that is, the <code>String</code> representations of the <code>BigDecimal</code> numbers are
* identical -- they have the same characters in the same sequence).
- *
+ *
* @return An <code>int</code> that is the hashcode for <code>this</code>.
* @stable ICU 2.0
*/
+ @Override
public int hashCode() {
// Maybe calculate ourselves, later. If so, note that there can be
// more than one internal representation for a given toString() result.
* (32-bit signed integer) result then only the low-order 32 bits are used. (That is, the number may be
* <i>decapitated</i>.) To avoid unexpected errors when these conditions occur, use the {@link #intValueExact}
* method.
- *
+ *
* @return An <code>int</code> converted from <code>this</code>, truncated and decapitated if necessary.
* @stable ICU 2.0
*/
+ @Override
public int intValue() {
return toBigInteger().intValue();
}
* Converts this <code>BigDecimal</code> to an <code>int</code>. If the <code>BigDecimal</code> has a non-zero
* decimal part or is out of the possible range for an <code>int</code> (32-bit signed integer) result then an
* <code>ArithmeticException</code> is thrown.
- *
+ *
* @return An <code>int</code> equal in value to <code>this</code>.
* @throws ArithmeticException if <code>this</code> has a non-zero decimal part, or will not fit in an <code>int</code>.
* @stable ICU 2.0
* (64-bit signed integer) result then only the low-order 64 bits are used. (That is, the number may be
* <i>decapitated</i>.) To avoid unexpected errors when these conditions occur, use the {@link #longValueExact}
* method.
- *
+ *
* @return A <code>long</code> converted from <code>this</code>, truncated and decapitated if necessary.
* @stable ICU 2.0
*/
+ @Override
public long longValue() {
return toBigInteger().longValue();
}
* Converts this <code>BigDecimal</code> to a <code>long</code>. If the <code>BigDecimal</code> has a non-zero
* decimal part or is out of the possible range for a <code>long</code> (64-bit signed integer) result then an
* <code>ArithmeticException</code> is thrown.
- *
+ *
* @return A <code>long</code> equal in value to <code>this</code>.
* @throws ArithmeticException if <code>this</code> has a non-zero decimal part, or will not fit in a <code>long</code>.
* @stable ICU 2.0
// note that we could safely use the 'test for wrap to negative'
// algorithm here, but instead we parallel the intValueExact
// algorithm for ease of checking and maintenance.
- result = (long) 0;
+ result = 0;
{
int $17 = lodigit + useexp;
i = 0;
* <p>
* <code>n</code> may be negative, in which case the method returns the same result as <code>movePointRight(-n)
* </code>.
- *
+ *
* @param n The <code>int</code> specifying the number of places to move the decimal point leftwards.
* @return A <code>BigDecimal</code> derived from <code>this</code>, with the decimal point moved <code>n</code>
* places to the left.
* <p>
* <code>n</code> may be negative, in which case the method returns the same result as <code>movePointLeft(-n)
* </code>.
- *
+ *
* @param n The <code>int</code> specifying the number of places to move the decimal point rightwards.
* @return A <code>BigDecimal</code> derived from <code>this</code>, with the decimal point moved <code>n</code>
* places to the right.
* Returns the scale of this <code>BigDecimal</code>. Returns a non-negative <code>int</code> which is the scale of
* the number. The scale is the number of digits in the decimal part of the number if the number were formatted
* without exponential notation.
- *
+ *
* @return An <code>int</code> whose value is the scale of this <code>BigDecimal</code>.
* @stable ICU 2.0
*/
* <p>
* The same as {@link #setScale(int, int)}, where the first parameter is the scale, and the second is <code>
* MathContext.ROUND_UNNECESSARY</code>.
- *
+ *
* @param scale The <code>int</code> specifying the scale of the resulting <code>BigDecimal</code>.
* @return A plain <code>BigDecimal</code> with the given scale.
* @throws ArithmeticException if <code>scale</code> is negative.
* <p>
* If <code>round</code> is <code>MathContext.ROUND_UNNECESSARY</code>, an <code>ArithmeticException</code> is
* thrown if any discarded digits are non-zero.
- *
+ *
* @param scale The <code>int</code> specifying the scale of the resulting <code>BigDecimal</code>.
* @param round The <code>int</code> rounding mode to be used for the division (see the {@link MathContext} class).
* @return A plain <code>BigDecimal</code> with the given scale.
* Converts this <code>BigDecimal</code> to a <code>short</code>. If the <code>BigDecimal</code> has a non-zero
* decimal part or is out of the possible range for a <code>short</code> (16-bit signed integer) result then an
* <code>ArithmeticException</code> is thrown.
- *
+ *
* @return A <code>short</code> equal in value to <code>this</code>.
* @throws ArithmeticException if <code>this</code> has a non-zero decimal part, or will not fit in a <code>short</code>.
* @stable ICU 2.0
* Returns the sign of this <code>BigDecimal</code>, as an <code>int</code>. This returns the <i>signum</i> function
* value that represents the sign of this <code>BigDecimal</code>. That is, -1 if the <code>BigDecimal</code> is
* negative, 0 if it is numerically equal to zero, or 1 if it is positive.
- *
+ *
* @return An <code>int</code> which is -1 if the <code>BigDecimal</code> is negative, 0 if it is numerically equal
* to zero, or 1 if it is positive.
* @stable ICU 2.0
*/
public int signum() {
- return (int) this.ind; // [note this assumes values for ind.]
+ return this.ind; // [note this assumes values for ind.]
}
/**
* <p>
* <i>(Note: this method is provided only in the <code>com.ibm.icu.math</code> version of the BigDecimal class. It
* would not be present in a <code>java.math</code> version.)</i>
- *
+ *
* @return The <code>java.math.BigDecimal</code> equal in value to this <code>BigDecimal</code>.
* @stable ICU 2.0
*/
* <p>
* Any decimal part is truncated (discarded). If an exception is desired should the decimal part be non-zero, use
* {@link #toBigIntegerExact()}.
- *
+ *
* @return The <code>java.math.BigInteger</code> equal in value to the integer part of this <code>BigDecimal</code>.
* @stable ICU 2.0
*/
res = clone(this); // safe copy
newlen = res.mant.length + res.exp;
newmant = new byte[newlen]; // [shorter]
- java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0,
+ java.lang.System.arraycopy(res.mant, 0, newmant, 0,
newlen);
res.mant = newmant;
res.form = (byte) com.ibm.icu.math.MathContext.PLAIN;
* Converts this <code>BigDecimal</code> to a <code>java.math.BigInteger</code>.
* <p>
* An exception is thrown if the decimal part (if any) is non-zero.
- *
+ *
* @return The <code>java.math.BigInteger</code> equal in value to the integer part of this <code>BigDecimal</code>.
* @throws ArithmeticException if <code>this</code> has a non-zero decimal part.
* @stable ICU 2.0
* Returns the <code>BigDecimal</code> as a character array. The result of this method is the same as using the
* sequence <code>toString().toCharArray()</code>, but avoids creating the intermediate <code>String</code> and
* <code>char[]</code> objects.
- *
+ *
* @return The <code>char[]</code> array corresponding to this <code>BigDecimal</code>.
* @stable ICU 2.0
*/
* <p>
* By definition, using the {@link #BigDecimal(String)} constructor on the result <code>String</code> will create a
* <code>BigDecimal</code> that is exactly equal to the original <code>BigDecimal</code>.
- *
+ *
* @return The <code>String</code> exactly corresponding to this <code>BigDecimal</code>.
* @see #format(int, int)
* @see #format(int, int, int, int, int, int)
* @stable ICU 2.0
*/
+ @Override
public java.lang.String toString() {
return new java.lang.String(layout());
}
* Returns the number as a <code>BigInteger</code> after removing the scale. That is, the number is expressed as a
* plain number, any decimal point is then removed (retaining the digits of any decimal part), and the result is
* then converted to a <code>BigInteger</code>.
- *
+ *
* @return The <code>java.math.BigInteger</code> equal in value to this <code>BigDecimal</code> multiplied by ten to
* the power of <code>this.scale()</code>.
* @stable ICU 2.0
* The number is constructed as though <code>num</code> had been converted to a <code>String</code> using the <code>
* Double.toString()</code> method and the {@link #BigDecimal(java.lang.String)} constructor had then been used.
* This is typically not an exact conversion.
- *
+ *
* @param dub The <code>double</code> to be translated.
* @return The <code>BigDecimal</code> equal in value to <code>dub</code>.
* @throws NumberFormatException if the parameter is infinite or not a number.
/**
* Translates a <code>long</code> to a <code>BigDecimal</code>. That is, returns a plain <code>BigDecimal</code>
* whose value is equal to the given <code>long</code>.
- *
+ *
* @param lint The <code>long</code> to be translated.
* @return The <code>BigDecimal</code> equal in value to <code>lint</code>.
* @stable ICU 2.0
* <code> (new BigDecimal(lint)).divide(TEN.pow(new BigDecimal(scale))) </code>
* <p>
* A <code>NumberFormatException</code> is thrown if <code>scale</code> is negative.
- *
+ *
* @param lint The <code>long</code> to be translated.
* @param scale The <code>int</code> scale to be applied.
* @return The <code>BigDecimal</code> equal in value to <code>lint</code>.
int $18 = mant.length;
i = 0;
for (; $18 > 0; $18--, i++) {
- cmant[i] = (char) (mant[i] + ((int) ('0')));
+ cmant[i] = (char) (mant[i] + (('0')));
}
}/* i */
return cmant; // non-negative integer
rec = new char[cmant.length + 1];
rec[0] = '-';
- java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, 1, cmant.length);
+ java.lang.System.arraycopy(cmant, 0, rec, 1, cmant.length);
return rec;
}
rec[i] = '0';
}
}/* i */
- java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, (needsign + 2) - mag,
+ java.lang.System.arraycopy(cmant, 0, rec, (needsign + 2) - mag,
cmant.length);
return rec;
}
rec = new char[len];
if (needsign != 0)
rec[0] = '-';
- java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, cmant.length);
+ java.lang.System.arraycopy(cmant, 0, rec, needsign, cmant.length);
{
int $21 = mag - cmant.length;
i = needsign + cmant.length;
rec = new char[len];
if (needsign != 0)
rec[0] = '-';
- java.lang.System.arraycopy((java.lang.Object) cmant, 0, (java.lang.Object) rec, needsign, mag);
+ java.lang.System.arraycopy(cmant, 0, rec, needsign, mag);
rec[needsign + mag] = '.';
- java.lang.System.arraycopy((java.lang.Object) cmant, mag, (java.lang.Object) rec, (needsign + mag) + 1,
+ java.lang.System.arraycopy(cmant, mag, rec, (needsign + mag) + 1,
cmant.length - mag);
return rec;
}
/*
* Arg1 is operation code: D=divide, I=integer divide, R=remainder Arg2 is the rhs. Arg3 is the context. Arg4 is
* explicit scale iff code='D' or 'I' (-1 if none).
- *
+ *
* Underlying algorithm (complications for Remainder function and scaled division are omitted for clarity):
- *
+ *
* Test for x/0 and then 0/x Exp =Exp1 - Exp2 Exp =Exp +len(var1) -len(var2) Sign=Sign1 Sign2 Pad accumulator (Var1)
* to double-length with 0's (pad1) Pad Var2 to same length as Var1 B2B=1st two digits of var2, +1 to allow for
* roundup have=0 Do until (have=digits+1 OR residue=0) if exp<0 then if integer divide/residue then leave
* if mult=0 then mult=1 this_digit=this_digit+mult subtract end inner_loop if have\=0 | this_digit\=0 then do
* output this_digit have=have+1; end var2=var2/10 exp=exp-1 end outer_loop exp=exp+1 -- set the proper exponent if
* have=0 then generate answer=0 Return to FINISHED Result defined by MATHV1
- *
+ *
* For extended commentary, see DMSRCN.
*/
} while (false);
}/* compare */
/* prepare for subtraction. Estimate BA (lengths the same) */
- ba = (int) var1[0]; // use only first digit
+ ba = var1[0]; // use only first digit
} // lengths the same
else {/* lhs longer than rhs */
/* use first two digits for estimate */
if (start == 0)
continue inner;
// shift left
- java.lang.System.arraycopy((java.lang.Object) var1, start, (java.lang.Object) var1, 0, var1len);
+ java.lang.System.arraycopy(var1, start, var1, 0, var1len);
}
}/* inner */
}/* i */
if (d < var1.length) {/* need to reduce */
newvar1 = new byte[d];
- java.lang.System.arraycopy((java.lang.Object) var1, 0, (java.lang.Object) newvar1, 0, d); // shorten
+ java.lang.System.arraycopy(var1, 0, newvar1, 0, d); // shorten
var1 = newvar1;
}
res.mant = var1;
// [we could let finish do this, during strip, if we adjusted
// the exponent; however, truncation avoids the strip loop]
newmant = new byte[have]; // shorten
- java.lang.System.arraycopy((java.lang.Object) res.mant, 0, (java.lang.Object) newmant, 0, have);
+ java.lang.System.arraycopy(res.mant, 0, newmant, 0, have);
res.mant = newmant;
}
return res.finish(set, true);
/*
* <sgml> Extend byte array to given length, padding with 0s. If no extension is required then return the same
* array. </sgml>
- *
+ *
* Arg1 is the source byte array Arg2 is the new length (longer)
*/
if (inarr.length == newlen)
return inarr;
newarr = new byte[newlen];
- java.lang.System.arraycopy((java.lang.Object) inarr, 0, (java.lang.Object) newarr, 0, inarr.length);
+ java.lang.System.arraycopy(inarr, 0, newarr, 0, inarr.length);
// 0 padding is carried out by the JVM on allocation initialization
return newarr;
}
* <sgml> Add or subtract two >=0 integers in byte arrays <p>This routine performs the calculation: <pre> C=A+(BM)
* </pre> Where M is in the range -9 through +9 <p> If M<0 then A>=B must be true, so the result is always
* non-negative.
- *
+ *
* Leading zeros are not removed after a subtraction. The result is either the same length as the longer of A and B,
* or 1 longer than that (if a carry occurred).
- *
+ *
* A is not altered unless Arg6 is 1. B is never altered.
- *
+ *
* Arg1 is A Arg2 is A length to use (if longer than A, pad with 0's) Arg3 is B Arg4 is B length to use (if longer
* than B, pad with 0's) Arg5 is M, the multiplier Arg6 is 1 if A can be used to build the result (if it fits)
- *
+ *
* This routine is severely performance-critical;any change here must be measured (timed) to assure no performance
* degradation.
*/
maxarr = bp;
if (maxarr < ap)
maxarr = ap;
- reb = (byte[]) null; // result byte array
+ reb = null; // result byte array
if (reuse)
if ((maxarr + 1) == alength)
reb = a; // OK to reuse A
// if digit<0 then signal ArithmeticException("internal.error ["digit"]")
/* We have carry -- need to make space for the extra digit */
- newarr = (byte[]) null;
+ newarr = null;
if (reuse)
if ((maxarr + 2) == a.length)
newarr = a; // OK to reuse A
}
}/* i */
else
- java.lang.System.arraycopy((java.lang.Object) reb, 0, (java.lang.Object) newarr, 1, maxarr + 1);
+ java.lang.System.arraycopy(reb, 0, newarr, 1, maxarr + 1);
return newarr;
}
/*
* <sgml> Round to specified digits, if necessary. Arg1 is requested length (digits to round to) [may be <=0 when
* called from format, dodivide, etc.] Arg2 is rounding mode returns this, for convenience
- *
+ *
* ind and exp are adjusted, but not cleared for a mantissa of zero
- *
+ *
* The length of the mantissa returned will be Arg1, except when Arg1 is 0, in which case the returned mantissa
* length will be 1. </sgml>
*/
return this; // nowt to do
exp = exp + adjust; // exponent of result
- sign = (int) ind; // save [assumes -1, 0, 1]
+ sign = ind; // save [assumes -1, 0, 1]
oldmant = mant; // save
if (len > 0) {
// remove the unwanted digits
mant = new byte[len];
- java.lang.System.arraycopy((java.lang.Object) oldmant, 0, (java.lang.Object) mant, 0, len);
+ java.lang.System.arraycopy(oldmant, 0, mant, 0, len);
reuse = true; // can reuse mantissa
first = oldmant[len]; // first of discarded digits
} else {/* len<=0 */
// drop rightmost digit and raise exponent
exp++;
// mant is already the correct length
- java.lang.System.arraycopy((java.lang.Object) newmant, 0, (java.lang.Object) mant, 0,
+ java.lang.System.arraycopy(newmant, 0, mant, 0,
mant.length);
} else
mant = newmant;
/*
* <sgml> Test if rightmost digits are all 0. Arg1 is a mantissa array to test Arg2 is the offset of first digit to
* check [may be negative; if so, digits to left are 0's] returns 1 if all the digits starting at Arg2 are 0
- *
+ *
* Arg2 may be beyond array bounds, in which case 1 is returned </sgml>
*/
}/* i */
if (d < this.mant.length) {/* need to reduce */
newmant = new byte[d];
- java.lang.System.arraycopy((java.lang.Object) this.mant, 0, (java.lang.Object) newmant, 0, d);
+ java.lang.System.arraycopy(this.mant, 0, newmant, 0, d);
this.mant = newmant;
}
}
if (i > 0) {
do {
newmant = new byte[this.mant.length - i];
- java.lang.System.arraycopy((java.lang.Object) this.mant, i, (java.lang.Object) newmant, 0,
+ java.lang.System.arraycopy(this.mant, i, newmant, 0,
this.mant.length - i);
this.mant = newmant;
} while (false);
public final class MathContext implements java.io.Serializable{
//private static final java.lang.String $0="MathContext.nrx";
-
+
/* ----- Properties ----- */
/* properties public constant */
/**
* @stable ICU 2.0
*/
public static final int PLAIN=0; // [no exponent]
-
+
/**
* Standard floating point notation (with scientific exponential
* format, where there is one digit before any decimal point).
* @stable ICU 2.0
*/
public static final int SCIENTIFIC=1; // 1 digit before .
-
+
/**
* Standard floating point notation (with engineering exponential
* format, where the power of ten is a multiple of 3).
* @stable ICU 2.0
*/
public static final int ENGINEERING=2; // 1-3 digits before .
-
+
// The rounding modes match the original BigDecimal class values
/**
* Rounding mode to round to a more positive number.
* @stable ICU 2.0
*/
public static final int ROUND_CEILING=2;
-
+
/**
* Rounding mode to round towards zero.
* Used as a setting to control the rounding mode used during a
* @stable ICU 2.0
*/
public static final int ROUND_DOWN=1;
-
+
/**
* Rounding mode to round to a more negative number.
* Used as a setting to control the rounding mode used during a
* @stable ICU 2.0
*/
public static final int ROUND_FLOOR=3;
-
+
/**
* Rounding mode to round to nearest neighbor, where an equidistant
* value is rounded down.
* @stable ICU 2.0
*/
public static final int ROUND_HALF_DOWN=5;
-
+
/**
* Rounding mode to round to nearest neighbor, where an equidistant
* value is rounded to the nearest even neighbor.
* @stable ICU 2.0
*/
public static final int ROUND_HALF_EVEN=6;
-
+
/**
* Rounding mode to round to nearest neighbor, where an equidistant
* value is rounded up.
* @stable ICU 2.0
*/
public static final int ROUND_HALF_UP=4;
-
+
/**
* Rounding mode to assert that no rounding is necessary.
* Used as a setting to control the rounding mode used during a
* @stable ICU 2.0
*/
public static final int ROUND_UNNECESSARY=7;
-
+
/**
* Rounding mode to round away from zero.
* Used as a setting to control the rounding mode used during a
* @stable ICU 2.0
*/
public static final int ROUND_UP=0;
-
-
+
+
/* properties shared */
/**
* The number of digits (precision) to be used for an operation.
* @serial
*/
int digits;
-
+
/**
* The form of results from an operation.
* <p>
* @serial
*/
int form; // values for this must fit in a byte
-
+
/**
* Controls whether lost digits checking is enabled for an
* operation.
* @serial
*/
boolean lostDigits;
-
+
/**
* The rounding algorithm to be used for an operation.
* <p>
* @serial
*/
int roundingMode;
-
+
/* properties private constant */
// default settings
private static final int DEFAULT_FORM=SCIENTIFIC;
private static final int DEFAULT_DIGITS=9;
private static final boolean DEFAULT_LOSTDIGITS=false;
private static final int DEFAULT_ROUNDINGMODE=ROUND_HALF_UP;
-
+
/* properties private constant */
-
+
private static final int MIN_DIGITS=0; // smallest value for DIGITS.
private static final int MAX_DIGITS=999999999; // largest value for DIGITS. If increased,
// the BigDecimal class may need update.
// list of valid rounding mode values, most common two first
private static final int ROUNDS[]=new int[]{ROUND_HALF_UP,ROUND_UNNECESSARY,ROUND_CEILING,ROUND_DOWN,ROUND_FLOOR,ROUND_HALF_DOWN,ROUND_HALF_EVEN,ROUND_UP};
-
-
+
+
private static final java.lang.String ROUNDWORDS[]=new java.lang.String[]{"ROUND_HALF_UP","ROUND_UNNECESSARY","ROUND_CEILING","ROUND_DOWN","ROUND_FLOOR","ROUND_HALF_DOWN","ROUND_HALF_EVEN","ROUND_UP"}; // matching names of the ROUNDS values
-
-
-
-
+
+
+
+
/* properties private constant unused */
-
+
// Serialization version
private static final long serialVersionUID=7163376998892515376L;
-
+
/* properties public constant */
/**
* A <code>MathContext</code> object initialized to the default
*/
public static final com.ibm.icu.math.MathContext DEFAULT=new com.ibm.icu.math.MathContext(DEFAULT_DIGITS,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
-
-
-
+
+
+
/* ----- Constructors ----- */
-
+
/**
* Constructs a new <code>MathContext</code> with a specified
* precision.
* @throws IllegalArgumentException parameter out of range.
* @stable ICU 2.0
*/
-
+
public MathContext(int setdigits){
this(setdigits,DEFAULT_FORM,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
return;}
-
+
/**
* Constructs a new <code>MathContext</code> with a specified
* precision and form.
* @throws IllegalArgumentException parameter out of range.
* @stable ICU 2.0
*/
-
+
public MathContext(int setdigits,int setform){
this(setdigits,setform,DEFAULT_LOSTDIGITS,DEFAULT_ROUNDINGMODE);
return;}
* @throws IllegalArgumentException parameter out of range.
* @stable ICU 2.0
*/
-
+
public MathContext(int setdigits,int setform,boolean setlostdigits){
this(setdigits,setform,setlostdigits,DEFAULT_ROUNDINGMODE);
return;}
* @throws IllegalArgumentException parameter out of range.
* @stable ICU 2.0
*/
-
+
public MathContext(int setdigits,int setform,boolean setlostdigits,int setroundingmode){super();
-
-
+
+
// set values, after checking
- if (setdigits!=DEFAULT_DIGITS)
+ if (setdigits!=DEFAULT_DIGITS)
{
- if (setdigits<MIN_DIGITS)
+ if (setdigits<MIN_DIGITS)
throw new java.lang.IllegalArgumentException("Digits too small:"+" "+setdigits);
- if (setdigits>MAX_DIGITS)
+ if (setdigits>MAX_DIGITS)
throw new java.lang.IllegalArgumentException("Digits too large:"+" "+setdigits);
}
{/*select*/
throw new java.lang.IllegalArgumentException("Bad form value:"+" "+setform);
}
}
- if ((!(isValidRound(setroundingmode))))
+ if ((!(isValidRound(setroundingmode))))
throw new java.lang.IllegalArgumentException("Bad roundingMode value:"+" "+setroundingmode);
digits=setdigits;
form=setform;
* setting
* @stable ICU 2.0
*/
-
+
public int getDigits(){
return digits;
}
* @return an <code>int</code> which is the value of the form setting
* @stable ICU 2.0
*/
-
+
public int getForm(){
return form;
}
* setting
* @stable ICU 2.0
*/
-
+
public boolean getLostDigits(){
return lostDigits;
}
* setting
* @stable ICU 2.0
*/
-
+
public int getRoundingMode(){
return roundingMode;
}
* @return a <code>String</code> representing the context settings.
* @stable ICU 2.0
*/
-
+
+ @Override
public java.lang.String toString(){
java.lang.String formstr=null;
int r=0;
}
}
{int $1=ROUNDS.length;r=0;r:for(;$1>0;$1--,r++){
- if (roundingMode==ROUNDS[r])
+ if (roundingMode==ROUNDS[r])
{
roundword=ROUNDWORDS[r];
break r;
return "digits="+digits+" "+"form="+formstr+" "+"lostDigits="+(lostDigits?"1":"0")+" "+"roundingMode="+roundword;
}
-
+
/* <sgml> Test whether round is valid. </sgml> */
// This could be made shared for use by BigDecimal for setScale.
-
+
private static boolean isValidRound(int testround){
int r=0;
{int $2=ROUNDS.length;for(r=0;$2>0;$2--,r++){
- if (testround==ROUNDS[r])
+ if (testround==ROUNDS[r])
return true;
}
}/*r*/
private char tailChar;
/**
- * Convert a range of text in the source array, putting the result
+ * Convert a range of text in the source array, putting the result
* into a range of text in the destination array, and return the number
* of characters written.
*
* @param sourceStart The start of the range of text to convert
* @param sourceLength The length of the range of text to convert
* @param dest The destination array that will receive the result.
- * It may be <code>NULL</code> only if <code>destSize</code> is 0.
+ * It may be <code>NULL</code> only if <code>destSize</code> is 0.
* @param destStart The start of the range of the destination buffer to use.
* @param destSize The size (capacity) of the destination buffer.
* If <code>destSize</code> is 0, then no output is produced,
* but the necessary buffer size is returned ("preflighting"). This
- * does not validate the text against the options, for example,
+ * does not validate the text against the options, for example,
* if letters are being unshaped, and spaces are being consumed
- * following lamalef, this will not detect a lamalef without a
+ * following lamalef, this will not detect a lamalef without a
* corresponding space. An error will be thrown when the actual
* conversion is attempted.
* @return The number of chars written to the destination buffer.
}
if ((destSize != 0) &&
(destStart < 0 || destSize < 0 || destStart + destSize > dest.length)) {
- throw new IllegalArgumentException("bad dest start (" + destStart +
- ") or size (" + destSize +
+ throw new IllegalArgumentException("bad dest start (" + destStart +
+ ") or size (" + destSize +
") for buffer of length " + dest.length);
}
/* Validate input options */
throw new IllegalArgumentException("Wrong Lam Alef argument");
}
///CLOVER:ON
-
+
/* Validate Tashkeel (Tashkeel replacement options should be enabled in shaping mode only)*/
if(((options&TASHKEEL_MASK) != 0) && (options&LETTERS_MASK) == LETTERS_UNSHAPE) {
throw new IllegalArgumentException("Tashkeel replacement should not be enabled in deshaping mode ");
* Construct ArabicShaping using the options flags.
* The flags are as follows:<br>
* 'LENGTH' flags control whether the text can change size, and if not,
- * how to maintain the size of the text when LamAlef ligatures are
+ * how to maintain the size of the text when LamAlef ligatures are
* formed or broken.<br>
* 'TEXT_DIRECTION' flags control whether the text is read and written
* in visual order or in logical order.<br>
if ((options & DIGITS_MASK) > 0x80) {
throw new IllegalArgumentException("bad DIGITS options");
}
-
+
isLogical = ( (options & TEXT_DIRECTION_MASK) == TEXT_DIRECTION_LOGICAL );
/* Validate options */
spacesRelativeToTextBeginEnd = ( (options & SPACES_RELATIVE_TO_TEXT_MASK) == SPACES_RELATIVE_TO_TEXT_BEGIN_END );
tailChar = OLD_TAIL_CHAR;
}
}
-
- /* Seen Tail options */
+
+ /* Seen Tail options */
/**
* Memory option: the result must have the same length as the source.
- * Shaping mode: The SEEN family character will expand into two characters using space near
+ * Shaping mode: The SEEN family character will expand into two characters using space near
* the SEEN family character(i.e. the space after the character).
* if there are no spaces found, ArabicShapingException will be thrown
*
*/
public static final int SEEN_TWOCELL_NEAR = 0x200000;
- /** Bit mask for Seen memory options.
+ /** Bit mask for Seen memory options.
* @stable ICU 4.2
*/
public static final int SEEN_MASK = 0x700000;
- /* YehHamza options */
+ /* YehHamza options */
/**
* Memory option: the result must have the same length as the source.
- * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
+ * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
* (i.e. the space after the character)
* if there are no spaces found, ArabicShapingException will be thrown
*
* De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
* replaced by one cell YehHamza and space will replace the Hamza.
* Affects: YehHamza options
- * @stable ICU 4.2
+ * @stable ICU 4.2
*/
public static final int YEHHAMZA_TWOCELL_NEAR = 0x1000000;
- /** Bit mask for YehHamza memory options.
+ /** Bit mask for YehHamza memory options.
* @stable ICU 4.2
*/
public static final int YEHHAMZA_MASK = 0x3800000;
- /* New Tashkeel options */
+ /* New Tashkeel options */
/**
* Memory option: the result must have the same length as the source.
- * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
* Spaces will be placed at beginning of the buffer
*
* De-shaping mode: N/A
/**
* Memory option: the result must have the same length as the source.
- * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
* Spaces will be placed at end of the buffer
*
* De-shaping mode: N/A
/**
* Memory option: allow the result to have a different length than the source.
- * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
- * De-shaping mode: N/A
+ * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
+ * De-shaping mode: N/A
*
* Affects: Tashkeel options
* @stable ICU 4.2
*/
public static final int TASHKEEL_REPLACE_BY_TATWEEL = 0xC0000;
- /** Bit mask for Tashkeel replacement with Space or Tatweel memory options.
+ /** Bit mask for Tashkeel replacement with Space or Tatweel memory options.
* @stable ICU 4.2
*/
public static final int TASHKEEL_MASK = 0xE0000;
-
- /* Space location Control options */
+
+ /* Space location Control options */
/**
* This option effects the meaning of BEGIN and END options. if this option is not used the default
- * for BEGIN and END will be as following:
+ * for BEGIN and END will be as following:
* The Default (for both Visual LTR, Visual RTL and Logical Text)
* 1. BEGIN always refers to the start address of physical memory.
* 2. END always refers to the end address of physical memory.
*
- * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
+ * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
*
* The affect on BEGIN and END Memory Options will be as following:
- * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text
+ * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text
* (corresponding to the physical memory address end, same as END in default behavior)
- * B. BEGIN For Logical text: Same as BEGIN in default behavior.
- * C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to
- * the physical memory address beginning, same as BEGIN in default behavior)
- * D. END For Logical text: Same as END in default behavior.
+ * B. BEGIN For Logical text: Same as BEGIN in default behavior.
+ * C. END For Visual LTR text: This will be the end (left side) of the visual text. (corresponding to
+ * the physical memory address beginning, same as BEGIN in default behavior)
+ * D. END For Logical text: Same as END in default behavior.
* Affects: All LamAlef BEGIN, END and AUTO options.
- * @stable ICU 4.2
+ * @stable ICU 4.2
*/
public static final int SPACES_RELATIVE_TO_TEXT_BEGIN_END = 0x4000000;
- /** Bit mask for swapping BEGIN and END for Visual LTR text
+ /** Bit mask for swapping BEGIN and END for Visual LTR text
* @stable ICU 4.2
*/
public static final int SPACES_RELATIVE_TO_TEXT_MASK = 0x4000000;
-
+
/**
- * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
+ * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
* If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
- * De-shaping will not use this option as it will always search for both the new Unicode code point for the
+ * De-shaping will not use this option as it will always search for both the new Unicode code point for the
* TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
* Seen-Family letter accordingly.
*
*/
public static final int SHAPE_TAIL_NEW_UNICODE = 0x8000000;
- /** Bit mask for new Unicode Tail option
+ /** Bit mask for new Unicode Tail option
* @stable ICU 4.2
*/
public static final int SHAPE_TAIL_TYPE_MASK = 0x8000000;
* @stable ICU 4.2
*/
public static final int LAMALEF_RESIZE = 0;
-
+
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces next to modified characters.
* @stable ICU 4.2
*/
public static final int LAMALEF_NEAR = 1 ;
-
+
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the end of the text.
* @stable ICU 4.2
*/
public static final int LAMALEF_END = 2;
-
+
/**
* Memory option: the result must have the same length as the source.
* If more room is necessary, then try to consume spaces at the beginning of the text.
* This option is an alias to LENGTH_FIXED_SPACES_AT_BEGINNING
* @stable ICU 4.2
*/
- public static final int LAMALEF_BEGIN = 3;
+ public static final int LAMALEF_BEGIN = 3;
/**
* Memory option: the result must have the same length as the source.
* is no space at beginning of the buffer, use spaces at the near (i.e. the space
* after the LAMALEF character).
*
- * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END.
+ * Deshaping Mode: Perform the same function as the flag equals LAMALEF_END.
* Affects: LamAlef options
* @stable ICU 4.2
*/
- public static final int LAMALEF_AUTO = 0x10000;
-
- /**
- * Bit mask for memory options.
+ public static final int LAMALEF_AUTO = 0x10000;
+
+ /**
+ * Bit mask for memory options.
* @stable ICU 2.0
*/
public static final int LENGTH_MASK = 0x10003;
- /** Bit mask for LamAlef memory options.
+ /** Bit mask for LamAlef memory options.
* @stable ICU 4.2
*/
public static final int LAMALEF_MASK = 0x10003;
- /**
- * Direction indicator: the source is in logical (keyboard) order.
+ /**
+ * Direction indicator: the source is in logical (keyboard) order.
* @stable ICU 2.0
*/
public static final int TEXT_DIRECTION_LOGICAL = 0;
* @stable ICU 4.2
*/
public static final int TEXT_DIRECTION_VISUAL_RTL = 0;
-
- /**
+
+ /**
* Direction indicator: the source is in visual (display) order, that is,
* the leftmost displayed character is stored first.
* @stable ICU 2.0
*/
public static final int TEXT_DIRECTION_VISUAL_LTR = 4;
- /**
- * Bit mask for direction indicators.
+ /**
+ * Bit mask for direction indicators.
* @stable ICU 2.0
*/
public static final int TEXT_DIRECTION_MASK = 4;
/**
- * Letter shaping option: do not perform letter shaping.
+ * Letter shaping option: do not perform letter shaping.
* @stable ICU 2.0
*/
public static final int LETTERS_NOOP = 0;
- /**
+ /**
* Letter shaping option: replace normative letter characters in the U+0600 (Arabic) block,
* by shaped ones in the U+FE70 (Presentation Forms B) block. Performs Lam-Alef ligature
* substitution.
*/
public static final int LETTERS_SHAPE = 8;
- /**
+ /**
* Letter shaping option: replace shaped letter characters in the U+FE70 (Presentation Forms B) block
* by normative ones in the U+0600 (Arabic) block. Converts Lam-Alef ligatures to pairs of Lam and
* Alef characters, consuming spaces if required.
*/
public static final int LETTERS_SHAPE_TASHKEEL_ISOLATED = 0x18;
- /**
- * Bit mask for letter shaping options.
+ /**
+ * Bit mask for letter shaping options.
* @stable ICU 2.0
*/
public static final int LETTERS_MASK = 0x18;
- /**
- * Digit shaping option: do not perform digit shaping.
+ /**
+ * Digit shaping option: do not perform digit shaping.
* @stable ICU 2.0
*/
public static final int DIGITS_NOOP = 0;
* Digit shaping option:
* Replace European digits (U+0030...U+0039) by Arabic-Indic digits
* if the most recent strongly directional character
- * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
+ * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
* The initial state at the start of the text is assumed to be not an Arabic,
* letter, so European digits at the start of the text will not change.
* Compare to DIGITS_ALEN2AN_INIT_AL.
* Digit shaping option:
* Replace European digits (U+0030...U+0039) by Arabic-Indic digits
* if the most recent strongly directional character
- * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
+ * is an Arabic letter (its Bidi direction value is RIGHT_TO_LEFT_ARABIC).
* The initial state at the start of the text is assumed to be an Arabic,
* letter, so European digits at the start of the text will change.
* Compare to DIGITS_ALEN2AN_INT_LR.
/** Not a valid option value. */
//private static final int DIGITS_RESERVED = 0xa0;
- /**
- * Bit mask for digit shaping options.
+ /**
+ * Bit mask for digit shaping options.
* @stable ICU 2.0
*/
public static final int DIGITS_MASK = 0xe0;
- /**
- * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
+ /**
+ * Digit type option: Use Arabic-Indic digits (U+0660...U+0669).
* @stable ICU 2.0
*/
public static final int DIGIT_TYPE_AN = 0;
- /**
- * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
+ /**
+ * Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9).
* @stable ICU 2.0
*/
public static final int DIGIT_TYPE_AN_EXTENDED = 0x100;
- /**
- * Bit mask for digit type options.
+ /**
+ * Bit mask for digit type options.
* @stable ICU 2.0
*/
public static final int DIGIT_TYPE_MASK = 0x0100; // 0x3f00?
private static final char LAM_CHAR = '\u0644';
private static final char SPACE_CHAR = '\u0020';
private static final char SHADDA_CHAR = '\uFE7C';
- private static final char SHADDA06_CHAR = '\u0651';
+ private static final char SHADDA06_CHAR = '\u0651';
private static final char TATWEEL_CHAR = '\u0640';
private static final char SHADDA_TATWEEL_CHAR = '\uFE7D';
private static final char NEW_TAIL_CHAR = '\uFE73';
/**
* @stable ICU 2.0
*/
+ @Override
public boolean equals(Object rhs) {
- return rhs != null &&
- rhs.getClass() == ArabicShaping.class &&
+ return rhs != null &&
+ rhs.getClass() == ArabicShaping.class &&
options == ((ArabicShaping)rhs).options;
}
* @stable ICU 2.0
*/
///CLOVER:OFF
+ @Override
public int hashCode() {
return options;
}
/**
* @stable ICU 2.0
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder(super.toString());
buf.append('[');
private static final int LINKL = 2;
private static final int LINK_MASK = 3;
- private static final int irrelevantPos[] = {
- 0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE
+ private static final int irrelevantPos[] = {
+ 0x0, 0x2, 0x4, 0x6, 0x8, 0xA, 0xC, 0xE
};
/*
private static final char convertLamAlef[] = {
- '\u0622', // FEF5
+ '\u0622', // FEF5
'\u0622', // FEF6
'\u0623', // FEF7
'\u0623', // FEF8
'\u0625', // FEF9
'\u0625', // FEFA
'\u0627', // FEFB
- '\u0627' // FEFC
+ '\u0627' // FEFC
};
*/
-
+
private static final int tailFamilyIsolatedFinal[] = {
/* FEB1 */ 1,
/* FEB2 */ 1,
* Function: Converts the Alef characters into an equivalent
* LamAlef location in the 0x06xx Range, this is an
* intermediate stage in the operation of the program
- * later it'll be converted into the 0xFExx LamAlefs
+ * later it'll be converted into the 0xFExx LamAlefs
* in the shaping function.
*/
private static char changeLamAlef(char ch) {
* function, this function returns 1 or 2 for these special characters
*/
private static int specialChar(char ch) {
- if ((ch > '\u0621' && ch < '\u0626') ||
+ if ((ch > '\u0621' && ch < '\u0626') ||
(ch == '\u0627') ||
(ch > '\u062E' && ch < '\u0633') ||
(ch > '\u0647' && ch < '\u064A') ||
return 1;
} else if (ch >= '\u064B' && ch<= '\u0652') {
return 2;
- } else if (ch >= 0x0653 && ch <= 0x0655 ||
+ } else if (ch >= 0x0653 && ch <= 0x0655 ||
ch == 0x0670 ||
ch >= 0xFE70 && ch <= 0xFE7F) {
return 3;
return 0;
}
}
-
+
/*
* Name : getLink
- * Function: Resolves the link between the characters as
+ * Function: Resolves the link between the characters as
* Arabic characters have four forms :
* Isolated, Initial, Middle and Final Form
*/
* Function: Counts the number of spaces
* at each end of the logical buffer
*/
- private static int countSpacesLeft(char[] dest,
+ private static int countSpacesLeft(char[] dest,
int start,
int count) {
for (int i = start, e = start + count; i < e; ++i) {
/*
*Name : isSeenTailFamilyChar
- *Function : returns 1 if the character is a seen family isolated character
+ *Function : returns 1 if the character is a seen family isolated character
* in the FE range otherwise returns 0
*/
/*
*Name : isTailChar
- *Function : returns true if the character matches one of the tail characters
+ *Function : returns true if the character matches one of the tail characters
* (0xfe73 or 0x200b) otherwise returns false
*/
return false;
}
}
-
+
/*
*Name : isAlefMaksouraChar
- *Function : returns true if the character is a Alef Maksoura Final or isolated
+ *Function : returns true if the character is a Alef Maksoura Final or isolated
* otherwise returns false
*/
private static boolean isAlefMaksouraChar(char ch) {
return ( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649));
- }
+ }
/*
* Name : isYehHamzaChar
}else{
return false;
}
- }
+ }
/*
*Name : isTashkeelCharFE
*Function : Returns true for Tashkeel characters in FE range else return false
*/
-
+
private static boolean isTashkeelCharFE(char ch) {
return ( ch!=0xFE75 &&(ch>=0xFE70 && ch<= 0xFE7F) );
}
- /*
+ /*
* Name: isTashkeelOnTatweelChar
- * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
- * Tashkeel on tatweel (FE range), it returns 1 else if the
- * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
+ * Function: Checks if the Tashkeel Character is on Tatweel or not,if the
+ * Tashkeel on tatweel (FE range), it returns 1 else if the
+ * Tashkeel with shadda on tatweel (FC range)return 2 otherwise
* returns 0
*/
private static int isTashkeelOnTatweelChar(char ch){
return 0;
}
}
-
+
/*
* Name: isIsolatedTashkeelChar
- * Function: Checks if the Tashkeel Character is in the isolated form
- * (i.e. Unicode FE range) returns 1 else if the Tashkeel
+ * Function: Checks if the Tashkeel Character is in the isolated form
+ * (i.e. Unicode FE range) returns 1 else if the Tashkeel
* with shadda is in the isolated form (i.e. Unicode FC range)
* returns 1 otherwise returns 0
*/
return 0;
}
}
-
+
/*
* Name : isAlefChar
* Function: Returns 1 for Alef characters else return 0
private static boolean isAlefChar(char ch) {
return ch == '\u0622' || ch == '\u0623' || ch == '\u0625' || ch == '\u0627';
}
-
+
/*
* Name : isLamAlefChar
* Function: Returns true for LamAlef characters else return false
private int calculateSize(char[] source,
int sourceStart,
int sourceLength) {
-
+
int destSize = sourceLength;
-
+
switch (options & LETTERS_MASK) {
case LETTERS_SHAPE:
case LETTERS_SHAPE_TASHKEEL_ISOLATED:
return destSize;
}
-
-
+
+
/*
* Name : countSpaceSub
* Function: Counts number of times the subChar appears in the array
count++;
}
i++;
- }
- return count;
+ }
+ return count;
}
-
+
/*
* Name : shiftArray
* Function: Shifts characters to replace space sub characters
}
return w;
}
-
+
/*
* Name : handleTashkeelWithTatweel
- * Function : Replaces Tashkeel as following:
- * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
- * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
+ * Function : Replaces Tashkeel as following:
+ * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel.
+ * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace
* it with Shadda on Tatweel.
* Case 3: if the Tashkeel is isolated replace it with Space.
*
/*
*Name : handleGeneratedSpaces
*Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
- * and Tashkeel to space.
- * handleGeneratedSpaces function puts these generated spaces
+ * and Tashkeel to space.
+ * handleGeneratedSpaces function puts these generated spaces
* according to the options the user specifies. LamAlef and Tashkeel
- * spaces can be replaced at begin, at end, at near or decrease the
+ * spaces can be replaced at begin, at end, at near or decrease the
* buffer size.
*
* There is also Auto option for LamAlef and tashkeel, which will put
- * the spaces at end of the buffer (or end of text if the user used
+ * the spaces at end of the buffer (or end of text if the user used
* the option SPACES_RELATIVE_TO_TEXT_BEGIN_END).
*
- * If the text type was visual_LTR and the option
- * SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
+ * If the text type was visual_LTR and the option
+ * SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END
* option will place the space at the beginning of the buffer and
- * BEGIN will place the space at the end of the buffer.
+ * BEGIN will place the space at the end of the buffer.
*/
- private int handleGeneratedSpaces(char[] dest,
+ private int handleGeneratedSpaces(char[] dest,
int start,
int length) {
-
+
int lenOptionsLamAlef = options & LAMALEF_MASK;
int lenOptionsTashkeel = options & TASHKEEL_MASK;
boolean lamAlefOn = false;
boolean tashkeelOn = false;
-
+
if (!isLogical & !spacesRelativeToTextBeginEnd) {
switch (lenOptionsLamAlef) {
case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
case LAMALEF_END: lenOptionsLamAlef = LAMALEF_BEGIN; break;
default: break;
- }
+ }
switch (lenOptionsTashkeel){
case TASHKEEL_BEGIN: lenOptionsTashkeel = TASHKEEL_END; break;
case TASHKEEL_END: lenOptionsTashkeel = TASHKEEL_BEGIN; break;
default: break;
}
}
-
-
+
+
if (lenOptionsLamAlef == LAMALEF_NEAR) {
for (int i = start, e = i + length; i < e; ++i) {
if (dest[i] == LAMALEF_SPACE_SUB) {
dest[i] = SPACE_CHAR;
}
}
-
+
} else {
-
+
final int e = start + length;
int wL = countSpaceSub(dest, length, LAMALEF_SPACE_SUB);
int wT = countSpaceSub(dest, length, TASHKEEL_SPACE_SUB);
}
- if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) {
+ if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_END)) {
shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
while (wL > start) {
dest[--wL] = SPACE_CHAR;
- }
+ }
}
if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_END)){
dest[--wT] = SPACE_CHAR;
}
}
-
- lamAlefOn = false;
+
+ lamAlefOn = false;
tashkeelOn = false;
-
+
if (lenOptionsLamAlef == LAMALEF_RESIZE){
lamAlefOn = true;
}
if (lenOptionsTashkeel == TASHKEEL_RESIZE){
tashkeelOn = true;
}
-
+
if (lamAlefOn && (lenOptionsLamAlef == LAMALEF_RESIZE)){
shiftArray(dest, start, e, LAMALEF_SPACE_SUB);
wL = flipArray(dest,start,e, wL);
length = wL - start;
}
- if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) {
+ if (tashkeelOn && (lenOptionsTashkeel == TASHKEEL_RESIZE)) {
shiftArray(dest, start, e, TASHKEEL_SPACE_SUB);
wT = flipArray(dest,start,e, wT);
length = wT - start;
- }
+ }
+
+ lamAlefOn = false;
+ tashkeelOn = false;
- lamAlefOn = false;
- tashkeelOn = false;
-
- if ((lenOptionsLamAlef == LAMALEF_BEGIN) ||
+ if ((lenOptionsLamAlef == LAMALEF_BEGIN) ||
(lenOptionsLamAlef == LAMALEF_AUTO)){
lamAlefOn = true;
}
}
}
}
-
+
return length;
}
-
-
+
+
/*
*Name :expandCompositCharAtBegin
*Function :Expands the LamAlef character to Lam and Alef consuming the required
- * space from beginning of the buffer. If the text type was visual_LTR
+ * space from beginning of the buffer. If the text type was visual_LTR
* and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected
* the spaces will be located at end of buffer.
* If there are no spaces to expand the LamAlef, an exception is thrown.
private boolean expandCompositCharAtBegin(char[] dest,int start, int length,
int lacount) {
boolean spaceNotFound = false;
-
+
if (lacount > countSpacesRight(dest, start, length)) {
spaceNotFound = true;
return spaceNotFound;
}
}
return spaceNotFound;
-
+
}
/*
*Name : expandCompositCharAtEnd
- *Function : Expands the LamAlef character to Lam and Alef consuming the
+ *Function : Expands the LamAlef character to Lam and Alef consuming the
* required space from end of the buffer. If the text type was
* Visual LTR and the option SPACES_RELATIVE_TO_TEXT_BEGIN_END
- * was used, the spaces will be consumed from begin of buffer. If
- * there are no spaces to expand the LamAlef, an exception is thrown.
+ * was used, the spaces will be consumed from begin of buffer. If
+ * there are no spaces to expand the LamAlef, an exception is thrown.
*/
private boolean expandCompositCharAtEnd(char[] dest,int start, int length,
int lacount){
boolean spaceNotFound = false;
-
+
if (lacount > countSpacesLeft(dest, start, length)) {
spaceNotFound = true;
return spaceNotFound;
/*
*Name : expandCompositCharAtNear
*Function : Expands the LamAlef character into Lam + Alef, YehHamza character
- * into Yeh + Hamza, SeenFamily character into SeenFamily character
- * + Tail, while consuming the space next to the character.
+ * into Yeh + Hamza, SeenFamily character into SeenFamily character
+ * + Tail, while consuming the space next to the character.
*/
private boolean expandCompositCharAtNear(char[] dest,int start, int length,
int yehHamzaOption, int seenTailOption, int lamAlefOption){
-
+
boolean spaceNotFound = false;
-
-
-
+
+
+
if (isNormalizedLamAlefChar(dest[start])) {
spaceNotFound = true;
return spaceNotFound;
}
- for (int i = start + length; --i >=start;) {
+ for (int i = start + length; --i >=start;) {
char ch = dest[i];
if (lamAlefOption == 1 && isNormalizedLamAlefChar(ch)) {
if (i>start &&dest[i-1] == SPACE_CHAR) {
return spaceNotFound;
}
}else if(yehHamzaOption == 1 && isYehHamzaChar(ch)){
-
+
if(i>start &&dest[i-1] == SPACE_CHAR){
dest[i] = yehHamzaToYeh[ch - YEH_HAMZAFE_CHAR];
dest[i-1] = HAMZAFE_CHAR;
spaceNotFound = true;
return spaceNotFound;
}
-
-
+
+
}
}
return false;
}
-
+
/*
* Name : expandCompositChar
* Function: LamAlef needs special handling as the LamAlef is
int lenOptionsLamAlef = options & LAMALEF_MASK;
int lenOptionsSeen = options & SEEN_MASK;
- int lenOptionsYehHamza = options & YEHHAMZA_MASK;
+ int lenOptionsYehHamza = options & YEHHAMZA_MASK;
boolean spaceNotFound = false;
-
+
if (!isLogical && !spacesRelativeToTextBeginEnd) {
switch (lenOptionsLamAlef) {
case LAMALEF_BEGIN: lenOptionsLamAlef = LAMALEF_END; break;
default: break;
}
}
-
+
if(shapingMode == 1){
if(lenOptionsLamAlef == LAMALEF_AUTO){
if(isLogical){
spaceNotFound = expandCompositCharAtBegin(dest, start, length, lacount);
if(spaceNotFound){
throw new ArabicShapingException("No spacefor lamalef");
- }
+ }
}else if(lenOptionsLamAlef == LAMALEF_NEAR){
spaceNotFound = expandCompositCharAtNear(dest, start, length,0,0,1);
if(spaceNotFound){
- throw new ArabicShapingException("No spacefor lamalef");
+ throw new ArabicShapingException("No spacefor lamalef");
}
}else if(lenOptionsLamAlef == LAMALEF_RESIZE){
for (int r = start + length, w = r + lacount; --r >= start;) {
if(lenOptionsSeen == SEEN_TWOCELL_NEAR){
spaceNotFound = expandCompositCharAtNear(dest, start, length,0,1,0);
if(spaceNotFound){
- throw new ArabicShapingException("No space for Seen tail expansion");
+ throw new ArabicShapingException("No space for Seen tail expansion");
}
}
if(lenOptionsYehHamza == YEHHAMZA_TWOCELL_NEAR){
spaceNotFound = expandCompositCharAtNear(dest, start, length,1,0,0);
if(spaceNotFound){
- throw new ArabicShapingException("No space for YehHamza expansion");
+ throw new ArabicShapingException("No space for YehHamza expansion");
}
- }
+ }
}
return length;
}
-
+
/* Convert the input buffer from FExx Range into 06xx Range
* to put all characters into the 06xx range
* even the lamalef is converted to the special region in
/*
* Name : deshapeNormalize
* Function: Convert the input buffer from FExx Range into 06xx Range
- * even the lamalef is converted to the special region in the 06xx range.
- * According to the options the user enters, all seen family characters
- * followed by a tail character are merged to seen tail family character and
+ * even the lamalef is converted to the special region in the 06xx range.
+ * According to the options the user enters, all seen family characters
+ * followed by a tail character are merged to seen tail family character and
* any yeh followed by a hamza character are merged to yehhamza character.
* Method returns the number of lamalef chars found.
*/
yehHamzaComposeEnabled = ((options&YEHHAMZA_MASK) == YEHHAMZA_TWOCELL_NEAR) ? 1 : 0;
seenComposeEnabled = ((options&SEEN_MASK) == SEEN_TWOCELL_NEAR)? 1 : 0;
-
+
for (int i = start, e = i + length; i < e; ++i) {
char ch = dest[i];
-
- if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR))
+
+ if( (yehHamzaComposeEnabled == 1) && ((ch == HAMZA06_CHAR) || (ch == HAMZAFE_CHAR))
&& (i < (length - 1)) && isAlefMaksouraChar(dest[i+1] )) {
dest[i] = SPACE_CHAR;
dest[i+1] = YEH_HAMZA_CHAR;
- } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1))
+ } else if ( (seenComposeEnabled == 1) && (isTailChar(ch)) && (i< (length - 1))
&& (isSeenTailFamilyChar(dest[i+1])==1) ) {
dest[i] = SPACE_CHAR;
}
* Function: Converts an Arabic Unicode buffer in 06xx Range into a shaped
* arabic Unicode buffer in FExx Range
*/
- private int shapeUnicode(char[] dest,
+ private int shapeUnicode(char[] dest,
int start,
int length,
int destSize,
int tashkeelFlag)throws ArabicShapingException {
-
+
int lamalef_count = normalize(dest, start, length);
// resolve the link between the characters.
}
if (((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0)) {
- lamalef_found = true;
+ lamalef_found = true;
char wLamalef = changeLamAlef(dest[i]); // get from 0x065C-0x065f
if (wLamalef != '\u0000') {
// replace alef by marker, it will be removed later
currLink = getLink(wLamalef); // requires '\u0000', unfortunately
}
if ((i > 0) && (dest[i-1] == SPACE_CHAR))
- {
+ {
if ( isSeenFamilyChar(dest[i]) == 1){
seenfam_found = true;
} else if (dest[i] == YEH_HAMZA_CHAR) {
shape &= 0x1;
} else if (flag == 2) {
if (tashkeelFlag == 0 &&
- ((lastLink & LINKL) != 0) &&
- ((nextLink & LINKR) != 0) &&
- dest[i] != '\u064C' &&
+ ((lastLink & LINKL) != 0) &&
+ ((nextLink & LINKR) != 0) &&
+ dest[i] != '\u064C' &&
dest[i] != '\u064D' &&
- !((nextLink & ALEFTYPE) == ALEFTYPE &&
+ !((nextLink & ALEFTYPE) == ALEFTYPE &&
(lastLink & LAMTYPE) == LAMTYPE)) {
-
+
shape = 1;
-
+
} else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
shape = 1;
-
+
} else {
shape = 0;
}
}
if (flag == 2) {
- if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) {
+ if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR) {
dest[i] = TASHKEEL_SPACE_SUB;
tashkeel_found = true;
}
else{
dest[i] = (char)('\uFE70' + irrelevantPos[dest[i] - '\u064B'] + shape);
}
- // else leave tashkeel alone
+ // else leave tashkeel alone
} else {
dest[i] = (char)('\uFE70' + (currLink >> 8) + shape);
}
}
}
- // If we found a lam/alef pair in the buffer
+ // If we found a lam/alef pair in the buffer
// call handleGeneratedSpaces to remove the spaces that were added
destSize = length;
* Function: Converts an Arabic Unicode buffer in FExx Range into unshaped
* arabic Unicode buffer in 06xx Range
*/
- private int deShapeUnicode(char[] dest,
+ private int deShapeUnicode(char[] dest,
int start,
int length,
int destSize) throws ArabicShapingException {
- int lamalef_count = deshapeNormalize(dest, start, length);
+ int lamalef_count = deshapeNormalize(dest, start, length);
// If there was a lamalef in the buffer call expandLamAlef
if (lamalef_count != 0) {
// need to adjust dest to fit expanded buffer... !!!
- destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE);
+ destSize = expandCompositChar(dest, start, length, lamalef_count,DESHAPE_MODE);
} else {
destSize = length;
}
return destSize;
}
- private int internalShape(char[] source,
+ private int internalShape(char[] source,
int sourceStart,
int sourceLength,
char[] dest,
if (destSize == 0) {
if (((options & LETTERS_MASK) != LETTERS_NOOP) &&
((options & LAMALEF_MASK) == LAMALEF_RESIZE)) {
-
+
return calculateSize(source, sourceStart, sourceLength);
} else {
return sourceLength; // by definition
break;
case LETTERS_SHAPE:
- if( ((options&TASHKEEL_MASK) != 0) &&
+ if( ((options&TASHKEEL_MASK) != 0) &&
((options&TASHKEEL_MASK) !=TASHKEEL_REPLACE_BY_TATWEEL)) {
/* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
outputSize = shapeUnicode(temp, 0, sourceLength, destSize, 2);
case LETTERS_UNSHAPE:
outputSize = deShapeUnicode(temp, 0, sourceLength, destSize);
- break;
+ break;
default:
break;
}
-
+
if (outputSize > destSize) {
throw new ArabicShapingException("not enough room for result data");
}
if (isLogical) {
invertBuffer(temp, 0, outputSize);
}
-
+
System.arraycopy(temp, 0, dest, destStart, outputSize);
-
+
return outputSize;
}
}
* String to display run
* @stable ICU 3.8
*/
+ @Override
public String toString()
{
return "BidiRun " + start + " - " + limit + " @ " + level;
* @return The clone.
* @stable ICU 2.0
*/
+ @Override
public Object clone()
{
try {
*/
public abstract int current();
-
- /**
- * Tag value for "words" that do not fit into any of other categories.
- * Includes spaces and most punctuation.
+
+ /**
+ * Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation.
* @stable ICU 53
*/
public static final int WORD_NONE = 0;
/**
- * Upper bound for tags for uncategorized words.
+ * Upper bound for tags for uncategorized words.
* @stable ICU 53
*/
public static final int WORD_NONE_LIMIT = 100;
/**
- * Tag value for words that appear to be numbers, lower limit.
+ * Tag value for words that appear to be numbers, lower limit.
* @stable ICU 53
*/
public static final int WORD_NUMBER = 100;
- /**
+ /**
* Tag value for words that appear to be numbers, upper limit.
* @stable ICU 53
*/
public static final int WORD_NUMBER_LIMIT = 200;
- /**
+ /**
* Tag value for words that contain letters, excluding
- * hiragana, katakana or ideographic characters, lower limit.
+ * hiragana, katakana or ideographic characters, lower limit.
* @stable ICU 53
*/
public static final int WORD_LETTER = 200;
- /**
- * Tag value for words containing letters, upper limit
+ /**
+ * Tag value for words containing letters, upper limit
* @stable ICU 53
*/
public static final int WORD_LETTER_LIMIT = 300;
- /**
+ /**
* Tag value for words containing kana characters, lower limit
* @stable ICU 53
*/
public static final int WORD_KANA = 300;
- /**
+ /**
* Tag value for words containing kana characters, upper limit
* @stable ICU 53
*/
* {@icu} Registers a new break iterator of the indicated kind, to use in the given
* locale. Clones of the iterator will be returned if a request for a break iterator
* of the given kind matches or falls back to this locale.
- *
+ *
* <p>Because ICU may choose to cache BreakIterator objects internally, this must
* be called at application startup, prior to any calls to
* BreakIterator.getInstance to avoid undefined behavior.
- *
+ *
* @param iter the BreakIterator instance to adopt.
* @param locale the Locale for which this instance is to be registered
* @param kind the type of iterator for which this instance is to be registered
* {@icu} Registers a new break iterator of the indicated kind, to use in the given
* locale. Clones of the iterator will be returned if a request for a break iterator
* of the given kind matches or falls back to this locale.
- *
+ *
* <p>Because ICU may choose to cache BreakIterator objects internally, this must
* be called at application startup, prior to any calls to
* BreakIterator.getInstance to avoid undefined behavior.
- *
+ *
* @param iter the BreakIterator instance to adopt.
* @param locale the Locale for which this instance is to be registered
* @param kind the type of iterator for which this instance is to be registered
import com.ibm.icu.lang.UScript;
class BurmeseBreakEngine extends DictionaryBreakEngine {
-
+
// Constants for BurmeseBreakIterator
// How many words in a row are "good enough"?
private static final byte BURMESE_LOOKAHEAD = 3;
private static final byte BURMESE_PREFIX_COMBINE_THRESHOLD = 3;
// Minimum word size
private static final byte BURMESE_MIN_WORD = 2;
-
+
private DictionaryMatcher fDictionary;
private static UnicodeSet fBurmeseWordSet;
private static UnicodeSet fEndWordSet;
private static UnicodeSet fBeginWordSet;
private static UnicodeSet fMarkSet;
-
+
static {
// Initialize UnicodeSets
fBurmeseWordSet = new UnicodeSet();
fMarkSet.compact();
fEndWordSet.compact();
fBeginWordSet.compact();
-
+
// Freeze the static UnicodeSet
fBurmeseWordSet.freeze();
fMarkSet.freeze();
fEndWordSet.freeze();
fBeginWordSet.freeze();
}
-
+
public BurmeseBreakEngine() throws IOException {
super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
setCharacters(fBurmeseWordSet);
fDictionary = DictionaryData.loadDictionaryFor("Mymr");
}
+ @Override
public boolean equals(Object obj) {
// Normally is a singleton, but it's possible to have duplicates
// during initialization. All are equivalent.
return obj instanceof BurmeseBreakEngine;
}
+ @Override
public int hashCode() {
return getClass().hashCode();
}
-
+
+ @Override
public boolean handles(int c, int breakType) {
if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
return false;
}
+ @Override
public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
DequeI foundBreaks) {
-
-
+
+
if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD) {
return 0; // Not enough characters for word
}
// no preceding word, or the non-word shares less than the minimum threshold
// of characters with a dictionary word, then scan to resynchronize
if (words[wordsFound%BURMESE_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
- (wordLength == 0 ||
+ (wordLength == 0 ||
words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) {
// Look for a plausible word boundary
int remaining = rangeEnd - (current + wordLength);
// Look ahead for possible suffixes if a dictionary word does not follow.
// We do this in code rather than using a rule so that the heuristic
- // resynch continues to function. For example, one of the suffix characters
+ // resynch continues to function. For example, one of the suffix characters
// could be a typo in the middle of a word.
// NOT CURRENTLY APPLICABLE TO BURMESE
class BytesDictionaryMatcher extends DictionaryMatcher {
private final byte[] characters;
private final int transform;
-
+
public BytesDictionaryMatcher(byte[] chars, int transform) {
characters = chars;
Assert.assrt((transform & DictionaryData.TRANSFORM_TYPE_MASK) == DictionaryData.TRANSFORM_TYPE_OFFSET);
// than adding a "transform type" variable
this.transform = transform;
}
-
+
private int transform(int c) {
- if (c == 0x200D) {
+ if (c == 0x200D) {
return 0xFF;
} else if (c == 0x200C) {
return 0xFE;
return delta;
}
+ @Override
public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) {
UCharacterIterator text = UCharacterIterator.getInstance(text_);
BytesTrie bt = new BytesTrie(characters, 0);
return numChars;
}
+ @Override
public int getType() {
return DictionaryData.TRIE_TYPE_BYTES;
}
class CharsDictionaryMatcher extends DictionaryMatcher {
private CharSequence characters;
-
+
public CharsDictionaryMatcher(CharSequence chars) {
characters = chars;
}
+ @Override
public int matches(CharacterIterator text_, int maxLength, int[] lengths, int[] count_, int limit, int[] values) {
UCharacterIterator text = UCharacterIterator.getInstance(text_);
CharsTrie uct = new CharsTrie(characters, 0);
}
c = text.nextCodePoint();
if (c == UCharacterIterator.DONE) {
- break;
+ break;
}
++numChars;
result = uct.nextForCodePoint(c);
return numChars;
}
+ @Override
public int getType() {
return DictionaryData.TRIE_TYPE_UCHARS;
}
*/
public class CharsetMatch implements Comparable<CharsetMatch> {
-
+
/**
* Create a java.io.Reader for reading the Unicode character data corresponding
* to the original byte data supplied to the Charset detect operation.
* <p>
* CAUTION: if the source of the byte data was an InputStream, a Reader
- * can be created for only one matching char set using this method. If more
+ * can be created for only one matching char set using this method. If more
* than one charset needs to be tried, the caller will need to reset
* the InputStream and create InputStreamReaders itself, based on the charset name.
*
*/
public Reader getReader() {
InputStream inputStream = fInputStream;
-
+
if (inputStream == null) {
inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength);
}
-
+
try {
inputStream.reset();
return new InputStreamReader(inputStream, getName());
Reader reader = getReader();
int max = maxLength < 0? Integer.MAX_VALUE : maxLength;
int bytesRead = 0;
-
+
while ((bytesRead = reader.read(buffer, 0, Math.min(max, 1024))) >= 0) {
sb.append(buffer, 0, bytesRead);
max -= bytesRead;
}
-
+
reader.close();
-
+
return sb.toString();
} else {
String name = getName();
return result;
}
-
+
/**
* Get an indication of the confidence in the charset detected.
* Confidence values range from 0-100, with larger numbers indicating
}
/**
- * Get the name of the detected charset.
+ * Get the name of the detected charset.
* The name will be one that can be used with other APIs on the
* platform that accept charset names. It is the "Canonical name"
* as defined by the class java.nio.charset.Charset; for
public String getName() {
return fCharsetName;
}
-
+
/**
- * Get the ISO code for the language of the detected charset.
+ * Get the ISO code for the language of the detected charset.
*
* @return The ISO code for the language or <code>null</code> if the language cannot be determined.
*
/**
* Compare to other CharsetMatch objects.
- * Comparison is based on the match confidence value, which
- * allows CharsetDetector.detectAll() to order its results.
+ * Comparison is based on the match confidence value, which
+ * allows CharsetDetector.detectAll() to order its results.
*
* @param other the CharsetMatch object to compare against.
- * @return a negative integer, zero, or a positive integer as the
+ * @return a negative integer, zero, or a positive integer as the
* confidence level of this CharsetMatch
* is less than, equal to, or greater than that of
* the argument.
* @throws ClassCastException if the argument is not a CharsetMatch.
* @stable ICU 4.4
*/
+ @Override
public int compareTo (CharsetMatch other) {
int compareResult = 0;
if (this.fConfidence > other.fConfidence) {
*/
CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf) {
fConfidence = conf;
-
+
// The references to the original application input data must be copied out
// of the charset recognizer to here, in case the application resets the
// recognizer before using this CharsetMatch.
*/
CharsetMatch(CharsetDetector det, CharsetRecognizer rec, int conf, String csName, String lang) {
fConfidence = conf;
-
+
// The references to the original application input data must be copied out
// of the charset recognizer to here, in case the application resets the
// recognizer before using this CharsetMatch.
fLang = lang;
}
-
+
//
// Private Data
//
private InputStream fInputStream = null; // User's input stream, or null if the user
// gave us a byte array.
-
+
private String fCharsetName; // The name of the charset this CharsetMatch
// represents. Filled in by the recognizer.
private String fLang; // The language, if one was determined by
* This is a superclass for the individual detectors for
* each of the detectable members of the ISO 2022 family
* of encodings.
- *
+ *
* The separate classes are nested within this class.
*/
abstract class CharsetRecog_2022 extends CharsetRecognizer {
-
+
/**
* Matching function shared among the 2022 detectors JP, CN and KR
* Counts up the number of legal an unrecognized escape sequences in
* the sample of text, and computes a score based on the total number &
* the proportion that fit the encoding.
- *
- *
+ *
+ *
* @param text the byte buffer containing text to analyse
* @param textLen the size of the text in the byte.
* @param escapeSequences the byte escape sequences to test for.
checkEscapes:
for (escN=0; escN<escapeSequences.length; escN++) {
byte [] seq = escapeSequences[escN];
-
+
if ((textLen - i) < seq.length) {
continue checkEscapes;
}
-
+
for (j=1; j<seq.length; j++) {
if (seq[j] != text[i+j]) {
continue checkEscapes;
- }
+ }
}
-
- hits++;
+
+ hits++;
i += seq.length-1;
continue scanInput;
}
-
- misses++;
+
+ misses++;
}
-
+
if (text[i] == 0x0e || text[i] == 0x0f) {
// Shift in/out
shifts++;
}
}
-
+
if (hits == 0) {
return 0;
}
-
+
//
// Initial quality is based on relative proportion of recongized vs.
- // unrecognized escape sequences.
+ // unrecognized escape sequences.
// All good: quality = 100;
// half or less good: quality = 0;
// linear inbetween.
quality = (100*hits - 100*misses) / (hits + misses);
-
+
// Back off quality if there were too few escape sequences seen.
// Include shifts in this computation, so that KR does not get penalized
// for having only a single Escape sequence, but many shifts.
if (hits+shifts < 5) {
quality -= (5-(hits+shifts))*10;
}
-
+
if (quality < 0) {
quality = 0;
- }
+ }
return quality;
}
-
-
-
+
+
+
static class CharsetRecog_2022JP extends CharsetRecog_2022 {
private byte [] [] escapeSequences = {
{0x1b, 0x24, 0x28, 0x43}, // KS X 1001:1992
{0x1b, 0x2e, 0x41}, // ISO 8859-1
{0x1b, 0x2e, 0x46} // ISO 8859-7
};
-
+
+ @Override
String getName() {
return "ISO-2022-JP";
}
-
+
+ @Override
CharsetMatch match(CharsetDetector det) {
int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
static class CharsetRecog_2022KR extends CharsetRecog_2022 {
private byte [] [] escapeSequences = {
- {0x1b, 0x24, 0x29, 0x43}
+ {0x1b, 0x24, 0x29, 0x43}
};
-
+
+ @Override
String getName() {
return "ISO-2022-KR";
}
-
+
+ @Override
CharsetMatch match(CharsetDetector det) {
int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
{0x1b, 0x4e}, // SS2
{0x1b, 0x4f}, // SS3
};
-
+
+ @Override
String getName() {
return "ISO-2022-CN";
}
-
+
+ @Override
CharsetMatch match(CharsetDetector det) {
int confidence = match(det.fInputBytes, det.fInputLen, escapeSequences);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
}
*/
class CharsetRecog_UTF8 extends CharsetRecognizer {
+ @Override
String getName() {
return "UTF-8";
}
/* (non-Javadoc)
* @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
*/
+ @Override
CharsetMatch match(CharsetDetector det) {
boolean hasBOM = false;
int numValid = 0;
int i;
int trailBytes = 0;
int confidence;
-
- if (det.fRawLength >= 3 &&
+
+ if (det.fRawLength >= 3 &&
(input[0] & 0xFF) == 0xef && (input[1] & 0xFF) == 0xbb && (input[2] & 0xFF) == 0xbf) {
hasBOM = true;
}
-
+
// Scan for multi-byte sequences
for (i=0; i<det.fRawLength; i++) {
int b = input[i];
if ((b & 0x80) == 0) {
continue; // ASCII
}
-
+
// Hi bit on char found. Figure out how long the sequence should be
if ((b & 0x0e0) == 0x0c0) {
- trailBytes = 1;
+ trailBytes = 1;
} else if ((b & 0x0f0) == 0x0e0) {
trailBytes = 2;
} else if ((b & 0x0f8) == 0xf0) {
numInvalid++;
continue;
}
-
+
// Verify that we've got the right number of trail bytes in the sequence
for (;;) {
i++;
}
}
}
-
+
// Cook up some sort of confidence score, based on presense of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
confidence = 0;
} else if (hasBOM && numValid > numInvalid*10) {
confidence = 80;
} else if (numValid > 3 && numInvalid == 0) {
- confidence = 100;
+ confidence = 100;
} else if (numValid > 0 && numInvalid == 0) {
confidence = 80;
} else if (numValid == 0 && numInvalid == 0) {
// Plain ASCII. Confidence must be > 10, it's more likely than UTF-16, which
// accepts ASCII with confidence = 10.
// TODO: add plain ASCII as an explicitly detected type.
- confidence = 15;
+ confidence = 15;
} else if (numValid > numInvalid*10) {
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
confidence = 25;
/* (non-Javadoc)
* @see com.ibm.icu.text.CharsetRecognizer#getName()
*/
+ @Override
abstract String getName();
/* (non-Javadoc)
* @see com.ibm.icu.text.CharsetRecognizer#match(com.ibm.icu.text.CharsetDetector)
*/
+ @Override
abstract CharsetMatch match(CharsetDetector det);
-
+
static int codeUnit16FromBytes(byte hi, byte lo) {
return ((hi & 0xff) << 8) | (lo & 0xff);
}
-
+
// UTF-16 confidence calculation. Very simple minded, but better than nothing.
// Any 8 bit non-control characters bump the confidence up. These have a zero high byte,
// and are very likely to be UTF-16, although they could also be part of a UTF-32 code.
// NULs are a contra-indication, they will appear commonly if the actual encoding is UTF-32.
- // NULs should be rare in actual text.
+ // NULs should be rare in actual text.
static int adjustConfidence(int codeUnit, int confidence) {
if (codeUnit == 0) {
confidence -= 10;
}
return confidence;
}
-
+
static class CharsetRecog_UTF_16_BE extends CharsetRecog_Unicode
{
+ @Override
String getName()
{
return "UTF-16BE";
}
-
+
+ @Override
CharsetMatch match(CharsetDetector det)
{
byte[] input = det.fRawInput;
int confidence = 10;
-
+
int bytesToCheck = Math.min(input.length, 30);
for (int charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
int codeUnit = codeUnit16FromBytes(input[charIndex], input[charIndex + 1]);
return null;
}
}
-
+
static class CharsetRecog_UTF_16_LE extends CharsetRecog_Unicode
{
+ @Override
String getName()
{
return "UTF-16LE";
}
-
+
+ @Override
CharsetMatch match(CharsetDetector det)
{
byte[] input = det.fRawInput;
int confidence = 10;
-
+
int bytesToCheck = Math.min(input.length, 30);
for (int charIndex=0; charIndex<bytesToCheck-1; charIndex+=2) {
int codeUnit = codeUnit16FromBytes(input[charIndex+1], input[charIndex]);
return null;
}
}
-
+
static abstract class CharsetRecog_UTF_32 extends CharsetRecog_Unicode
{
abstract int getChar(byte[] input, int index);
-
+
+ @Override
abstract String getName();
-
+
+ @Override
CharsetMatch match(CharsetDetector det)
{
byte[] input = det.fRawInput;
int numInvalid = 0;
boolean hasBOM = false;
int confidence = 0;
-
+
if (limit==0) {
return null;
}
if (getChar(input, 0) == 0x0000FEFF) {
hasBOM = true;
}
-
+
for(int i = 0; i < limit; i += 4) {
int ch = getChar(input, i);
-
+
if (ch < 0 || ch >= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) {
numInvalid += 1;
} else {
numValid += 1;
}
}
-
-
+
+
// Cook up some sort of confidence score, based on presence of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
if (hasBOM && numInvalid==0) {
} else if (hasBOM && numValid > numInvalid*10) {
confidence = 80;
} else if (numValid > 3 && numInvalid == 0) {
- confidence = 100;
+ confidence = 100;
} else if (numValid > 0 && numInvalid == 0) {
confidence = 80;
} else if (numValid > numInvalid*10) {
// Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance.
confidence = 25;
}
-
+
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
static class CharsetRecog_UTF_32_BE extends CharsetRecog_UTF_32
{
+ @Override
int getChar(byte[] input, int index)
{
return (input[index + 0] & 0xFF) << 24 | (input[index + 1] & 0xFF) << 16 |
(input[index + 2] & 0xFF) << 8 | (input[index + 3] & 0xFF);
}
-
+
+ @Override
String getName()
{
return "UTF-32BE";
}
}
-
+
static class CharsetRecog_UTF_32_LE extends CharsetRecog_UTF_32
{
+ @Override
int getChar(byte[] input, int index)
{
return (input[index + 3] & 0xFF) << 24 | (input[index + 2] & 0xFF) << 16 |
(input[index + 1] & 0xFF) << 8 | (input[index + 0] & 0xFF);
}
-
+
+ @Override
String getName()
{
return "UTF-32LE";
* Get the IANA name of this charset.
* @return the charset name.
*/
+ @Override
abstract String getName() ;
-
-
+
+
/**
* Test the match of this charset with the input text data
* which is obtained via the CharsetDetector object.
- *
+ *
* @param det The CharsetDetector, which contains the input text
* to be checked for being in this charset.
* @return Two values packed into one int (Damn java, anyhow)
int totalCharCount = 0;
int confidence = 0;
iteratedChar iter = new iteratedChar();
-
+
detectBlock: {
for (iter.reset(); nextChar(iter, det);) {
totalCharCount++;
if (iter.error) {
- badCharCount++;
+ badCharCount++;
} else {
long cv = iter.charValue & 0xFFFFFFFFL;
-
+
if (cv <= 0xff) {
singleByteCharCount++;
} else {
break detectBlock;
}
}
-
+
if (doubleByteCharCount <= 10 && badCharCount== 0) {
// Not many multi-byte chars.
if (doubleByteCharCount == 0 && totalCharCount < 10) {
// but is not incompatible with our encoding, so don't give it a zero.
confidence = 10;
}
-
+
break detectBlock;
}
-
+
//
// No match if there are too many characters that don't fit the encoding scheme.
// (should we have zero tolerance for these?)
confidence = 0;
break detectBlock;
}
-
+
if (commonChars == null) {
// We have no statistics on frequently occuring characters.
// Assess confidence purely on having a reasonable number of
confidence = Math.min(confidence, 100);
}
} // end of detectBlock:
-
+
return confidence;
}
-
+
// "Character" iterated character class.
// Recognizers for specific mbcs encodings make their "characters" available
// by providing a nextChar() function that fills in an instance of iteratedChar
int nextIndex = 0;
boolean error = false;
boolean done = false;
-
+
void reset() {
charValue = 0;
nextIndex = 0;
error = false;
done = false;
}
-
+
int nextByte(CharsetDetector det) {
if (nextIndex >= det.fRawLength) {
done = true;
return -1;
}
- int byteValue = (int)det.fRawInput[nextIndex++] & 0x00ff;
+ int byteValue = det.fRawInput[nextIndex++] & 0x00ff;
return byteValue;
- }
+ }
}
-
+
/**
* Get the next character (however many bytes it is) from the input data
* Subclasses for specific charset encodings must implement this function
* to get characters according to the rules of their encoding scheme.
- *
+ *
* This function is not a method of class iteratedChar only because
* that would require a lot of extra derived classes, which is awkward.
* @param it The iteratedChar "struct" into which the returned char is placed.
* @return True if a character was returned, false at end of input.
*/
abstract boolean nextChar(iteratedChar it, CharsetDetector det);
-
-
-
+
+
+
/**
- * Shift-JIS charset recognizer.
+ * Shift-JIS charset recognizer.
*
*/
static class CharsetRecog_sjis extends CharsetRecog_mbcs {
- static int [] commonChars =
+ static int [] commonChars =
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
- {0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
- 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
- 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
- 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
- 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
+ {0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
+ 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
+ 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
+ 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
+ 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
-
- boolean nextChar(iteratedChar it, CharsetDetector det) {
+
+ @Override
+ boolean nextChar(iteratedChar it, CharsetDetector det) {
it.error = false;
int firstByte;
firstByte = it.charValue = it.nextByte(det);
if (firstByte < 0) {
return false;
}
-
+
if (firstByte <= 0x7f || (firstByte>0xa0 && firstByte<=0xdf)) {
return true;
}
-
+
int secondByte = it.nextByte(det);
if (secondByte < 0) {
- return false;
+ return false;
}
it.charValue = (firstByte << 8) | secondByte;
if (! ((secondByte>=0x40 && secondByte<=0x7f) || (secondByte>=0x80 && secondByte<=0xff))) {
}
return true;
}
-
- CharsetMatch match(CharsetDetector det) {
+
+ @Override
+ CharsetMatch match(CharsetDetector det) {
int confidence = match(det, commonChars);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
- String getName() {
+
+ @Override
+ String getName() {
return "Shift_JIS";
}
-
- public String getLanguage()
+
+ @Override
+ public String getLanguage()
{
return "ja";
}
-
+
}
-
-
+
+
/**
- * Big5 charset recognizer.
+ * Big5 charset recognizer.
*
*/
static class CharsetRecog_big5 extends CharsetRecog_mbcs {
- static int [] commonChars =
+ static int [] commonChars =
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
- {0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
- 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
- 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
- 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
- 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
- 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
- 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
- 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
- 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
+ {0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
+ 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
+ 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
+ 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
+ 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
+ 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
+ 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
+ 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
+ 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
-
- boolean nextChar(iteratedChar it, CharsetDetector det) {
+
+ @Override
+ boolean nextChar(iteratedChar it, CharsetDetector det) {
it.error = false;
int firstByte;
firstByte = it.charValue = it.nextByte(det);
if (firstByte < 0) {
return false;
}
-
+
if (firstByte <= 0x7f || firstByte==0xff) {
// single byte character.
return true;
}
-
+
int secondByte = it.nextByte(det);
if (secondByte < 0) {
- return false;
+ return false;
}
it.charValue = (it.charValue << 8) | secondByte;
}
return true;
}
-
- CharsetMatch match(CharsetDetector det) {
+
+ @Override
+ CharsetMatch match(CharsetDetector det) {
int confidence = match(det, commonChars);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
- String getName() {
+
+ @Override
+ String getName() {
return "Big5";
}
-
-
- public String getLanguage()
+
+
+ @Override
+ public String getLanguage()
{
return "zh";
}
}
-
-
+
+
/**
* EUC charset recognizers. One abstract class that provides the common function
* for getting the next character according to the EUC encoding scheme,
- * and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
+ * and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
*
*/
abstract static class CharsetRecog_euc extends CharsetRecog_mbcs {
-
+
/*
* (non-Javadoc)
* Get the next character value for EUC based encodings.
* Character "value" is simply the raw bytes that make up the character
* packed into an int.
*/
- boolean nextChar(iteratedChar it, CharsetDetector det) {
+ @Override
+ boolean nextChar(iteratedChar it, CharsetDetector det) {
it.error = false;
int firstByte = 0;
int secondByte = 0;
int thirdByte = 0;
//int fourthByte = 0;
-
+
buildChar: {
- firstByte = it.charValue = it.nextByte(det);
+ firstByte = it.charValue = it.nextByte(det);
if (firstByte < 0) {
// Ran off the end of the input data
it.done = true;
// single byte char
break buildChar;
}
-
+
secondByte = it.nextByte(det);
it.charValue = (it.charValue << 8) | secondByte;
-
+
if (firstByte >= 0xA1 && firstByte <= 0xfe) {
// Two byte Char
if (secondByte < 0xa1) {
// In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
// We don't know which we've got.
// Treat it like EUC-JP. If the data really was EUC-TW, the following two
- // bytes will look like a well formed 2 byte char.
+ // bytes will look like a well formed 2 byte char.
if (secondByte < 0xa1) {
it.error = true;
}
- break buildChar;
+ break buildChar;
}
-
+
if (firstByte == 0x8f) {
// Code set 3.
// Three byte total char size, two bytes of actual char value.
}
}
}
-
+
return (it.done == false);
}
-
+
/**
* The charset recognize for EUC-JP. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
static class CharsetRecog_euc_jp extends CharsetRecog_euc {
- static int [] commonChars =
+ static int [] commonChars =
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
- {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
- 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
- 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
- 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
- 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
- 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
- 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
- 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
- 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
- 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
- String getName() {
+ {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
+ 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
+ 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
+ 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
+ 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
+ 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
+ 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
+ 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
+ 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
+ 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
+ @Override
+ String getName() {
return "EUC-JP";
}
-
- CharsetMatch match(CharsetDetector det) {
+
+ @Override
+ CharsetMatch match(CharsetDetector det) {
int confidence = match(det, commonChars);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
- public String getLanguage()
+
+ @Override
+ public String getLanguage()
{
return "ja";
}
}
-
+
/**
* The charset recognize for EUC-KR. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
static class CharsetRecog_euc_kr extends CharsetRecog_euc {
- static int [] commonChars =
+ static int [] commonChars =
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
- {0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
- 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
- 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
- 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
- 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
- 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
- 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
- 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
- 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
+ {0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
+ 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
+ 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
+ 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
+ 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
+ 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
+ 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
+ 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
+ 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
-
- String getName() {
+
+ @Override
+ String getName() {
return "EUC-KR";
}
-
- CharsetMatch match(CharsetDetector det) {
+
+ @Override
+ CharsetMatch match(CharsetDetector det) {
int confidence = match(det, commonChars);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
- public String getLanguage()
+
+ @Override
+ public String getLanguage()
{
return "ko";
}
}
}
-
+
/**
- *
- * GB-18030 recognizer. Uses simplified Chinese statistics.
+ *
+ * GB-18030 recognizer. Uses simplified Chinese statistics.
*
*/
static class CharsetRecog_gb_18030 extends CharsetRecog_mbcs {
-
+
/*
* (non-Javadoc)
* Get the next character value for EUC based encodings.
* Character "value" is simply the raw bytes that make up the character
* packed into an int.
*/
- boolean nextChar(iteratedChar it, CharsetDetector det) {
+ @Override
+ boolean nextChar(iteratedChar it, CharsetDetector det) {
it.error = false;
int firstByte = 0;
int secondByte = 0;
int thirdByte = 0;
int fourthByte = 0;
-
+
buildChar: {
- firstByte = it.charValue = it.nextByte(det);
-
+ firstByte = it.charValue = it.nextByte(det);
+
if (firstByte < 0) {
// Ran off the end of the input data
it.done = true;
break buildChar;
}
-
+
if (firstByte <= 0x80) {
// single byte char
break buildChar;
}
-
+
secondByte = it.nextByte(det);
it.charValue = (it.charValue << 8) | secondByte;
-
+
if (firstByte >= 0x81 && firstByte <= 0xFE) {
// Two byte Char
if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >=80 && secondByte <=0xFE)) {
break buildChar;
}
-
+
// Four byte char
if (secondByte >= 0x30 && secondByte <= 0x39) {
thirdByte = it.nextByte(det);
-
+
if (thirdByte >= 0x81 && thirdByte <= 0xFE) {
fourthByte = it.nextByte(det);
-
+
if (fourthByte >= 0x30 && fourthByte <= 0x39) {
it.charValue = (it.charValue << 16) | (thirdByte << 8) | fourthByte;
break buildChar;
}
}
}
-
+
it.error = true;
break buildChar;
}
}
-
+
return (it.done == false);
}
-
- static int [] commonChars =
+
+ static int [] commonChars =
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
- {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
- 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
- 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
- 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
- 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
- 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
- 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
- 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
- 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
+ {0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
+ 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
+ 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
+ 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
+ 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
+ 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
+ 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
+ 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
+ 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
-
- String getName() {
+
+ @Override
+ String getName() {
return "GB18030";
}
-
- CharsetMatch match(CharsetDetector det) {
+
+ @Override
+ CharsetMatch match(CharsetDetector det) {
int confidence = match(det, commonChars);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
- public String getLanguage()
+
+ @Override
+ public String getLanguage()
{
return "zh";
}
}
-
-
+
+
}
/* (non-Javadoc)
* @see com.ibm.icu.text.CharsetRecognizer#getName()
*/
+ @Override
abstract String getName();
static class NGramParser
protected int byteIndex = 0;
private int ngram = 0;
-
+
private int[] ngramList;
protected byte[] byteMap;
-
+
private int ngramCount;
private int hitCount;
-
+
protected byte spaceChar;
-
+
public NGramParser(int[] theNgramList, byte[] theByteMap)
{
ngramList = theNgramList;
byteMap = theByteMap;
-
+
ngram = 0;
-
+
ngramCount = hitCount = 0;
}
-
+
/*
* Binary search for value in table, which must have exactly 64 entries.
*/
private static int search(int[] table, int value)
{
int index = 0;
-
+
if (table[index + 32] <= value) {
index += 32;
}
-
+
if (table[index + 16] <= value) {
index += 16;
}
if (table[index] > value) {
index -= 1;
}
-
+
if (index < 0 || table[index] != value) {
return -1;
}
-
+
return index;
}
private void lookup(int thisNgram)
{
ngramCount += 1;
-
+
if (search(ngramList, thisNgram) >= 0) {
hitCount += 1;
}
-
+
}
-
+
protected void addByte(int b)
{
ngram = ((ngram << 8) + (b & 0xFF)) & N_GRAM_MASK;
lookup(ngram);
}
-
+
private int nextByte(CharsetDetector det)
{
if (byteIndex >= det.fInputLen) {
return -1;
}
-
+
return det.fInputBytes[byteIndex++] & 0xFF;
}
-
+
protected void parseCharacters(CharsetDetector det)
{
int b;
boolean ignoreSpace = false;
-
+
while ((b = nextByte(det)) >= 0) {
byte mb = byteMap[b];
-
+
// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
if (!(mb == spaceChar && ignoreSpace)) {
- addByte(mb);
+ addByte(mb);
}
-
+
ignoreSpace = (mb == spaceChar);
}
}
-
+
}
public int parse(CharsetDetector det)
}
public int parse(CharsetDetector det, byte spaceCh)
{
-
+
this.spaceChar = spaceCh;
-
+
parseCharacters(det);
-
+
// TODO: Is this OK? The buffer could have ended in the middle of a word...
addByte(spaceChar);
double rawPercent = (double) hitCount / (double) ngramCount;
-
+
// if (rawPercent <= 2.0) {
// return 0;
// }
-
+
// TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135...
if (rawPercent > 0.33) {
return 98;
}
-
+
return (int) (rawPercent * 300.0);
}
}
-
+
static class NGramParser_IBM420 extends NGramParser
{
private byte alef = 0x00;
-
+
protected static byte[] unshapeMap = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
-/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F,
-/* 5- */ (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F,
-/* 6- */ (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
-/* 7- */ (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F,
-/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F,
-/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E,
-/* A- */ (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF,
-/* B- */ (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF,
-/* C- */ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF,
-/* D- */ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF,
-/* E- */ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
-/* F- */ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
+/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x42, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x47, (byte) 0x49, (byte) 0x4A, (byte) 0x4B, (byte) 0x4C, (byte) 0x4D, (byte) 0x4E, (byte) 0x4F,
+/* 5- */ (byte) 0x50, (byte) 0x49, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x56, (byte) 0x58, (byte) 0x58, (byte) 0x5A, (byte) 0x5B, (byte) 0x5C, (byte) 0x5D, (byte) 0x5E, (byte) 0x5F,
+/* 6- */ (byte) 0x60, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x63, (byte) 0x65, (byte) 0x65, (byte) 0x67, (byte) 0x67, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+/* 7- */ (byte) 0x69, (byte) 0x71, (byte) 0x71, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x77, (byte) 0x79, (byte) 0x7A, (byte) 0x7B, (byte) 0x7C, (byte) 0x7D, (byte) 0x7E, (byte) 0x7F,
+/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x80, (byte) 0x8B, (byte) 0x8B, (byte) 0x8D, (byte) 0x8D, (byte) 0x8F,
+/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9A, (byte) 0x9E, (byte) 0x9E,
+/* A- */ (byte) 0x9E, (byte) 0xA1, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x9E, (byte) 0xAB, (byte) 0xAB, (byte) 0xAD, (byte) 0xAD, (byte) 0xAF,
+/* B- */ (byte) 0xAF, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7, (byte) 0xB8, (byte) 0xB9, (byte) 0xB1, (byte) 0xBB, (byte) 0xBB, (byte) 0xBD, (byte) 0xBD, (byte) 0xBF,
+/* C- */ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7, (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xBF, (byte) 0xCC, (byte) 0xBF, (byte) 0xCE, (byte) 0xCF,
+/* D- */ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7, (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDA, (byte) 0xDC, (byte) 0xDC, (byte) 0xDC, (byte) 0xDF,
+/* E- */ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7, (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+/* F- */ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7, (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
};
-
+
public NGramParser_IBM420(int[] theNgramList, byte[] theByteMap)
{
super(theNgramList, theByteMap);
}
-
+
private byte isLamAlef(byte b) {
if(b == (byte)0xb2 || b == (byte)0xb3){
return (byte)0x47;
}else
return (byte)0x00;
}
-
+
/*
* Arabic shaping needs to be done manually. Cannot call ArabicShaping class
* because CharsetDetector is dealing with bytes not Unicode code points. We could
{
if (byteIndex >= det.fInputLen || det.fInputBytes[byteIndex] == 0) {
return -1;
- }
+ }
int next;
-
+
alef = isLamAlef(det.fInputBytes[byteIndex]);
if(alef != (byte)0x00)
next = 0xB1 & 0xFF;
else
next = unshapeMap[det.fInputBytes[byteIndex]& 0xFF] & 0xFF;
-
+
byteIndex++;
-
+
return next;
}
-
+
+ @Override
protected void parseCharacters(CharsetDetector det)
{
- int b;
+ int b;
boolean ignoreSpace = false;
-
+
while ((b = nextByte(det)) >= 0) {
byte mb = byteMap[b];
-
+
// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
if (!(mb == spaceChar && ignoreSpace)) {
- addByte(mb);
+ addByte(mb);
}
-
+
ignoreSpace = (mb == spaceChar);
}
if(alef != (byte)0x00){
mb = byteMap[alef & 0xFF];
-
+
// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
if (!(mb == spaceChar && ignoreSpace)) {
- addByte(mb);
+ addByte(mb);
}
-
+
ignoreSpace = (mb == spaceChar);
}
-
+
}
}
}
}
-
-
+
+
int match(CharsetDetector det, int[] ngrams, byte[] byteMap)
{
return match (det, ngrams, byteMap, (byte)0x20);
}
-
+
int match(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar)
{
NGramParser parser = new NGramParser(ngrams, byteMap);
return parser.parse(det, spaceChar);
}
-
+
int matchIBM420(CharsetDetector det, int[] ngrams, byte[] byteMap, byte spaceChar){
NGramParser_IBM420 parser = new NGramParser_IBM420(ngrams, byteMap);
return parser.parse(det, spaceChar);
}
-
+
static class NGramsPlusLang {
int[] fNGrams;
String fLang;
static class CharsetRecog_8859_1 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
};
-
-
+
+
private static NGramsPlusLang[] ngrams_8859_1 = new NGramsPlusLang[] {
new NGramsPlusLang(
- "da",
+ "da",
new int[] {
- 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
- 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
- 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
- 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
+ 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
+ 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
+ 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
+ 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
}),
new NGramsPlusLang(
"de",
new int[] {
- 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
- 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
- 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
- 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
+ 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
+ 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
+ 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
+ 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
}),
new NGramsPlusLang(
"en",
new int[] {
- 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
- 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
- 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
- 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
+ 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
+ 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
+ 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
+ 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
}),
new NGramsPlusLang(
"es",
new int[] {
- 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
- 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
- 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
- 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
+ 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
+ 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
+ 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
+ 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
}),
-
+
new NGramsPlusLang(
"fr",
new int[] {
- 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
- 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
- 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
+ 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
+ 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
+ 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
}),
new NGramsPlusLang(
"it",
new int[] {
- 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
- 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
- 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
- 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
+ 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
+ 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
+ 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
+ 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
}),
-
+
new NGramsPlusLang(
"nl",
new int[] {
- 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
- 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
- 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
- 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
+ 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
+ 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
+ 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
+ 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
}),
-
+
new NGramsPlusLang(
"no",
new int[] {
- 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
- 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
- 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
- 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
+ 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
+ 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
+ 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
+ 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
}),
-
+
new NGramsPlusLang(
"pt",
new int[] {
- 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
- 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
- 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
- 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
+ 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
+ 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
+ 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
+ 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
}),
-
+
new NGramsPlusLang(
"sv",
new int[] {
- 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
- 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
- 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
- 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
+ 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
+ 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
+ 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
+ 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
}),
-
+
};
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1252" : "ISO-8859-1";
return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang);
}
-
+
+ @Override
public String getName()
{
return "ISO-8859-1";
}
}
-
+
static class CharsetRecog_8859_2 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20,
- (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
- (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7,
- (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0x20,
+ (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
+ (byte) 0x20, (byte) 0xB1, (byte) 0x20, (byte) 0xB3, (byte) 0x20, (byte) 0xB5, (byte) 0xB6, (byte) 0xB7,
+ (byte) 0x20, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0x20, (byte) 0xBE, (byte) 0xBF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
};
private static NGramsPlusLang[] ngrams_8859_2 = new NGramsPlusLang[] {
new NGramsPlusLang(
- "cs",
+ "cs",
new int[] {
- 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
- 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
- 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
- 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
+ 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
+ 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
+ 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
+ 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
}),
new NGramsPlusLang(
- "hu",
+ "hu",
new int[] {
- 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
- 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
- 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
- 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
+ 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
+ 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
+ 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
+ 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
}),
new NGramsPlusLang(
- "pl",
+ "pl",
new int[] {
- 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
- 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
- 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
- 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
+ 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
+ 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
+ 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
+ 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
}),
new NGramsPlusLang(
- "ro",
+ "ro",
new int[] {
- 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
- 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
- 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
- 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
+ 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
+ 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
+ 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
+ 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
})
};
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1250" : "ISO-8859-2";
return bestConfidenceSoFar <= 0 ? null : new CharsetMatch(det, this, bestConfidenceSoFar, name, lang);
}
+ @Override
public String getName()
{
return "ISO-8859-2";
}
}
-
-
+
+
abstract static class CharsetRecog_8859_5 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0x20, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0xFE, (byte) 0xFF,
};
+ @Override
public String getName()
{
return "ISO-8859-5";
}
}
-
+
static class CharsetRecog_8859_5_ru extends CharsetRecog_8859_5
{
private static int[] ngrams = {
- 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
- 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
- 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
- 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
+ 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
+ 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
+ 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
+ 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
};
+ @Override
public String getLanguage()
{
return "ru";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
abstract static class CharsetRecog_8859_6 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
- (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
+ (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
};
+ @Override
public String getName()
{
return "ISO-8859-6";
}
}
-
+
static class CharsetRecog_8859_6_ar extends CharsetRecog_8859_6
{
private static int[] ngrams = {
- 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
- 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
- 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
- 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
+ 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
+ 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
+ 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
+ 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
};
+ @Override
public String getLanguage()
{
return "ar";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
abstract static class CharsetRecog_8859_7 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20,
- (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE,
- (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0xA1, (byte) 0xA2, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xDC, (byte) 0x20,
+ (byte) 0xDD, (byte) 0xDE, (byte) 0xDF, (byte) 0x20, (byte) 0xFC, (byte) 0x20, (byte) 0xFD, (byte) 0xFE,
+ (byte) 0xC0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0x20, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x20,
};
+ @Override
public String getName()
{
return "ISO-8859-7";
}
}
-
+
static class CharsetRecog_8859_7_el extends CharsetRecog_8859_7
{
private static int[] ngrams = {
- 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
- 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
- 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
- 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
+ 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
+ 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
+ 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
+ 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
};
+ @Override
public String getLanguage()
{
return "el";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1253" : "ISO-8859-7";
return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "el");
}
}
-
+
abstract static class CharsetRecog_8859_8 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
};
+ @Override
public String getName()
{
return "ISO-8859-8";
}
}
-
+
static class CharsetRecog_8859_8_I_he extends CharsetRecog_8859_8
{
private static int[] ngrams = {
- 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
- 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
- 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
- 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
+ 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
+ 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
+ 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
+ 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
};
+ @Override
public String getName()
{
return "ISO-8859-8-I";
}
+ @Override
public String getLanguage()
{
return "he";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8-I";
return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he");
}
}
-
+
static class CharsetRecog_8859_8_he extends CharsetRecog_8859_8
{
private static int[] ngrams = {
- 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
- 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
- 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
- 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
+ 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
+ 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
+ 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
+ 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
};
+ @Override
public String getLanguage()
{
return "he";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8";
}
}
-
+
abstract static class CharsetRecog_8859_9 extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0x69, (byte) 0xFE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0x20,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
};
+ @Override
public String getName()
{
return "ISO-8859-9";
}
}
-
+
static class CharsetRecog_8859_9_tr extends CharsetRecog_8859_9
{
private static int[] ngrams = {
- 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
- 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
- 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
- 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
+ 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
+ 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
+ 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
+ 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
};
+ @Override
public String getLanguage()
{
return "tr";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
String name = det.fC1Bytes ? "windows-1254" : "ISO-8859-9";
return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "tr");
}
}
-
+
static class CharsetRecog_windows_1251 extends CharsetRecog_sbcs
{
private static int[] ngrams = {
- 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
- 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
- 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
- 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
+ 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
+ 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
+ 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
+ 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
};
private static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
- (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
- (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20,
- (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF,
- (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
- (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
- (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x90, (byte) 0x83, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
+ (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
+ (byte) 0x20, (byte) 0xA2, (byte) 0xA2, (byte) 0xBC, (byte) 0x20, (byte) 0xB4, (byte) 0x20, (byte) 0x20,
+ (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xBF,
+ (byte) 0x20, (byte) 0x20, (byte) 0xB3, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
+ (byte) 0xB8, (byte) 0x20, (byte) 0xBA, (byte) 0x20, (byte) 0xBC, (byte) 0xBE, (byte) 0xBE, (byte) 0xBF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0xF0, (byte) 0xF1, (byte) 0xF2, (byte) 0xF3, (byte) 0xF4, (byte) 0xF5, (byte) 0xF6, (byte) 0xF7,
+ (byte) 0xF8, (byte) 0xF9, (byte) 0xFA, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0xFF,
};
+ @Override
public String getName()
{
return "windows-1251";
}
-
+
+ @Override
public String getLanguage()
{
return "ru";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
static class CharsetRecog_windows_1256 extends CharsetRecog_sbcs
{
private static int[] ngrams = {
- 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
- 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
- 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
- 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
+ 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
+ 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
+ 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
+ 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
};
private static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
- (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
- (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
- (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
- (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x81, (byte) 0x20, (byte) 0x83, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x88, (byte) 0x20, (byte) 0x8A, (byte) 0x20, (byte) 0x9C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
+ (byte) 0x90, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x98, (byte) 0x20, (byte) 0x9A, (byte) 0x20, (byte) 0x9C, (byte) 0x20, (byte) 0x20, (byte) 0x9F,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0xAA, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xB5, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
+ (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0x20,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0xE0, (byte) 0xE1, (byte) 0xE2, (byte) 0xE3, (byte) 0xE4, (byte) 0xE5, (byte) 0xE6, (byte) 0xE7,
+ (byte) 0xE8, (byte) 0xE9, (byte) 0xEA, (byte) 0xEB, (byte) 0xEC, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xF4, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0xF9, (byte) 0x20, (byte) 0xFB, (byte) 0xFC, (byte) 0x20, (byte) 0x20, (byte) 0xFF,
};
+ @Override
public String getName()
{
return "windows-1256";
}
-
+
+ @Override
public String getLanguage()
{
return "ar";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
static class CharsetRecog_KOI8_R extends CharsetRecog_sbcs
{
private static int[] ngrams = {
- 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
- 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
- 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
- 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
+ 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
+ 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
+ 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
+ 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
};
private static byte[] byteMap = {
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
- (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
- (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
- (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
- (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
- (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
- (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
- (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
- (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
- (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x00,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67,
+ (byte) 0x68, (byte) 0x69, (byte) 0x6A, (byte) 0x6B, (byte) 0x6C, (byte) 0x6D, (byte) 0x6E, (byte) 0x6F,
+ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77,
+ (byte) 0x78, (byte) 0x79, (byte) 0x7A, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0xA3, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20, (byte) 0x20,
+ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
+ (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+ (byte) 0xC0, (byte) 0xC1, (byte) 0xC2, (byte) 0xC3, (byte) 0xC4, (byte) 0xC5, (byte) 0xC6, (byte) 0xC7,
+ (byte) 0xC8, (byte) 0xC9, (byte) 0xCA, (byte) 0xCB, (byte) 0xCC, (byte) 0xCD, (byte) 0xCE, (byte) 0xCF,
+ (byte) 0xD0, (byte) 0xD1, (byte) 0xD2, (byte) 0xD3, (byte) 0xD4, (byte) 0xD5, (byte) 0xD6, (byte) 0xD7,
+ (byte) 0xD8, (byte) 0xD9, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
};
-
+
+ @Override
public String getName()
{
return "KOI8-R";
}
-
+
+ @Override
public String getLanguage()
{
return "ru";
}
-
+
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
abstract static class CharsetRecog_IBM424_he extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
-/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 4- */ (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 7- */ (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40,
-/* 8- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 9- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* B- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 4- */ (byte) 0x40, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x53, (byte) 0x54, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 7- */ (byte) 0x40, (byte) 0x71, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x00, (byte) 0x40, (byte) 0x40,
+/* 8- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 9- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* B- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
};
+ @Override
public String getLanguage()
{
return "he";
}
}
- static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he
+ static class CharsetRecog_IBM424_he_rtl extends CharsetRecog_IBM424_he
{
+ @Override
public String getName()
{
return "IBM424_rtl";
}
private static int[] ngrams = {
- 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
- 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
- 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
- 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
+ 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
+ 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
+ 0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
+ 0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
};
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap, (byte)0x40);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
- static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he
+ static class CharsetRecog_IBM424_he_ltr extends CharsetRecog_IBM424_he
{
+ @Override
public String getName()
{
return "IBM424_ltr";
0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651
};
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = match(det, ngrams, byteMap, (byte)0x40);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
-
+
abstract static class CharsetRecog_IBM420_ar extends CharsetRecog_sbcs
{
protected static byte[] byteMap = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
-/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 7- */ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
-/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
-/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
-/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF,
-/* B- */ (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF,
-/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF,
-/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
-/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
-/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40,
+/* 0- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 1- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 2- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 3- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 4- */ (byte) 0x40, (byte) 0x40, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x47, (byte) 0x48, (byte) 0x49, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 5- */ (byte) 0x40, (byte) 0x51, (byte) 0x52, (byte) 0x40, (byte) 0x40, (byte) 0x55, (byte) 0x56, (byte) 0x57, (byte) 0x58, (byte) 0x59, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 6- */ (byte) 0x40, (byte) 0x40, (byte) 0x62, (byte) 0x63, (byte) 0x64, (byte) 0x65, (byte) 0x66, (byte) 0x67, (byte) 0x68, (byte) 0x69, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 7- */ (byte) 0x70, (byte) 0x71, (byte) 0x72, (byte) 0x73, (byte) 0x74, (byte) 0x75, (byte) 0x76, (byte) 0x77, (byte) 0x78, (byte) 0x79, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40,
+/* 8- */ (byte) 0x80, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x8A, (byte) 0x8B, (byte) 0x8C, (byte) 0x8D, (byte) 0x8E, (byte) 0x8F,
+/* 9- */ (byte) 0x90, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0x9A, (byte) 0x9B, (byte) 0x9C, (byte) 0x9D, (byte) 0x9E, (byte) 0x9F,
+/* A- */ (byte) 0xA0, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xAA, (byte) 0xAB, (byte) 0xAC, (byte) 0xAD, (byte) 0xAE, (byte) 0xAF,
+/* B- */ (byte) 0xB0, (byte) 0xB1, (byte) 0xB2, (byte) 0xB3, (byte) 0xB4, (byte) 0xB5, (byte) 0x40, (byte) 0x40, (byte) 0xB8, (byte) 0xB9, (byte) 0xBA, (byte) 0xBB, (byte) 0xBC, (byte) 0xBD, (byte) 0xBE, (byte) 0xBF,
+/* C- */ (byte) 0x40, (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, (byte) 0x85, (byte) 0x86, (byte) 0x87, (byte) 0x88, (byte) 0x89, (byte) 0x40, (byte) 0xCB, (byte) 0x40, (byte) 0xCD, (byte) 0x40, (byte) 0xCF,
+/* D- */ (byte) 0x40, (byte) 0x91, (byte) 0x92, (byte) 0x93, (byte) 0x94, (byte) 0x95, (byte) 0x96, (byte) 0x97, (byte) 0x98, (byte) 0x99, (byte) 0xDA, (byte) 0xDB, (byte) 0xDC, (byte) 0xDD, (byte) 0xDE, (byte) 0xDF,
+/* E- */ (byte) 0x40, (byte) 0x40, (byte) 0xA2, (byte) 0xA3, (byte) 0xA4, (byte) 0xA5, (byte) 0xA6, (byte) 0xA7, (byte) 0xA8, (byte) 0xA9, (byte) 0xEA, (byte) 0xEB, (byte) 0x40, (byte) 0xED, (byte) 0xEE, (byte) 0xEF,
+/* F- */ (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0x40, (byte) 0xFB, (byte) 0xFC, (byte) 0xFD, (byte) 0xFE, (byte) 0x40,
};
-
+
+ @Override
public String getLanguage()
{
return "ar";
}
-
+
}
- static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar
+ static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar
{
private static int[] ngrams = {
0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
};
+ @Override
public String getName()
{
return "IBM420_rtl";
}
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = matchIBM420(det, ngrams, byteMap, (byte)0x40);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
+
}
- static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar
+ static class CharsetRecog_IBM420_ar_ltr extends CharsetRecog_IBM420_ar
{
private static int[] ngrams = {
- 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
+ 0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
};
+ @Override
public String getName()
{
return "IBM420_ltr";
}
+ @Override
public CharsetMatch match(CharsetDetector det)
{
int confidence = matchIBM420(det, ngrams, byteMap, (byte)0x40);
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
-
+
}
}
public class ChineseDateFormat extends SimpleDateFormat {
// Generated by serialver from JDK 1.4.1_01
static final long serialVersionUID = -4610300753104099899L;
-
+
// TODO Finish the constructors
/**
*/
@Deprecated
public ChineseDateFormat(String pattern, String override, ULocale locale) {
- super(pattern, new ChineseDateFormatSymbols(locale),
+ super(pattern, new ChineseDateFormatSymbols(locale),
new ChineseCalendar(TimeZone.getDefault(), locale), locale, true, override);
}
// default:
// return super.subFormat(ch, count, beginOffset, pos, formatData, cal);
// }
-// }
+// }
/**
* {@inheritDoc}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
protected void subFormat(StringBuffer buf,
char ch, int count, int beginOffset,
/**
* {@inheritDoc}
- *
+ *
* @deprecated ICU 50
*/
@Deprecated
+ @Override
protected int subParse(String text, int start, char ch, int count, boolean obeyCount, boolean allowNegative,
boolean[] ambiguousYear, Calendar cal) {
// Logic to handle numeric 'G' eras for chinese calendar, and to skip special 2-digit year
/**
* {@inheritDoc}
- *
+ *
* @deprecated ICU 50
*/
+ @Override
@Deprecated
protected DateFormat.Field patternCharToDateFormatField(char ch) {
// no longer any field corresponding to pattern char 'l'
* the <code>ChineseCalendar</code> field which this attribute represents.
* Use -1 for <code>calendarField</code> if this field does not have a
* corresponding <code>ChineseCalendar</code> field.
- *
+ *
* @param name Name of the attribute
* @param calendarField <code>Calendar</code> field constant
- *
+ *
* @deprecated ICU 50
*/
@Deprecated
* Returns the <code>Field</code> constant that corresponds to the <code>
* ChineseCalendar</code> field <code>calendarField</code>. If there is no
* corresponding <code>Field</code> is available, null is returned.
- *
+ *
* @param calendarField <code>ChineseCalendar</code> field constant
* @return <code>Field</code> associated with the <code>calendarField</code>,
* or null if no associated <code>Field</code> is available.
* @throws IllegalArgumentException if <code>calendarField</code> is not
* a valid <code>Calendar</code> field constant.
- *
+ *
* @deprecated ICU 50
*/
@Deprecated
/**
* {@inheritDoc}
- *
+ *
* @deprecated ICU 50
*/
+ @Override
@Deprecated
///CLOVER:OFF
protected Object readResolve() throws InvalidObjectException {
* @see ChineseDateFormat
* @see com.ibm.icu.util.ChineseCalendar
* @author Alan Liu
- * @deprecated ICU 50
+ * @deprecated ICU 50
*/
@Deprecated
public class ChineseDateFormatSymbols extends DateFormatSymbols {
// Generated by serialver from JDK 1.4.1_01
static final long serialVersionUID = 6827816119783952890L;
-
+
/*
* Package-private array that ChineseDateFormat needs to be able to
* read.
initializeIsLeapMonth();
}
+ @Override
void initializeData(DateFormatSymbols dfs) {
super.initializeData(dfs);
if (dfs instanceof ChineseDateFormatSymbols) {
fHanWordSet.applyPattern("[:Han:]");
fKatakanaWordSet.applyPattern("[[:Katakana:]\\uff9e\\uff9f]");
fHiraganaWordSet.applyPattern("[:Hiragana:]");
-
+
// freeze them all
fHangulWordSet.freeze();
fHanWordSet.freeze();
}
private DictionaryMatcher fDictionary = null;
-
+
public CjkBreakEngine(boolean korean) throws IOException {
super(BreakIterator.KIND_WORD);
fDictionary = DictionaryData.loadDictionaryFor("Hira");
}
}
+ @Override
public boolean equals(Object obj) {
if (obj instanceof CjkBreakEngine) {
CjkBreakEngine other = (CjkBreakEngine)obj;
return false;
}
+ @Override
public int hashCode() {
return getClass().hashCode();
}
-
+
private static final int kMaxKatakanaLength = 8;
private static final int kMaxKatakanaGroupLength = 20;
private static final int maxSnlp = 255;
int katakanaCost[] = new int[] { 8192, 984, 408, 240, 204, 252, 300, 372, 480 };
return (wordlength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordlength];
}
-
+
private static boolean isKatakana(int value) {
return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) ||
(value >= 0xFF66 && value <= 0xFF9F);
}
-
+
+ @Override
public int divideUpDictionaryRange(CharacterIterator inText, int startPos, int endPos,
DequeI foundBreaks) {
if (startPos >= endPos) {
charPositions[numChars] = index;
}
}
-
+
// From here on out, do the algorithm. Note that our indices
// refer to indices within the normalized string.
int[] bestSnlp = new int[numChars + 1];
for (int i = 0; i <= numChars; i++) {
prev[i] = -1;
}
-
+
final int maxWordSize = 20;
int values[] = new int[numChars];
int lengths[] = new int[numChars];
if (bestSnlp[i] == kint32max) {
continue;
}
-
+
int maxSearchLength = (i + maxWordSize < numChars) ? maxWordSize : (numChars - i);
int[] count_ = new int[1];
fDictionary.matches(text, maxSearchLength, lengths, count_, maxSearchLength, values);
int count = count_[0];
-
- // if there are no single character matches found in the dictionary
+
+ // if there are no single character matches found in the dictionary
// starting with this character, treat character as a 1-character word
// with the highest value possible (i.e. the least likely to occur).
- // Exclude Korean characters from this treatment, as they should be
+ // Exclude Korean characters from this treatment, as they should be
// left together by default.
text.setIndex(i); // fDictionary.matches() advances the text position; undo that.
if ((count == 0 || lengths[0] != 1) && current32(text) != DONE32 && !fHangulWordSet.contains(current32(text))) {
prev[lengths[j] + i] = i;
}
}
-
+
// In Japanese, single-character Katakana words are pretty rare.
// So we apply the following heuristic to Katakana: any continuous
// run of Katakana characters is considered a candidate word with
- // a default cost specified in the katakanaCost table according
+ // a default cost specified in the katakanaCost table according
// to its length.
boolean is_katakana = isKatakana(current32(text));
if (!is_prev_katakana && is_katakana) {
next32(text);
++j;
}
-
+
if ((j - i) < kMaxKatakanaGroupLength) {
int newSnlp = bestSnlp[i] + getKatakanaCost(j - i);
if (newSnlp < bestSnlp[j]) {
foundBreaks.pop();
correctedNumBreaks--;
}
- if (!foundBreaks.isEmpty())
+ if (!foundBreaks.isEmpty())
inText.setIndex(foundBreaks.peek());
return correctedNumBreaks;
}
* Overrides clone.
* @stable ICU 2.0
*/
+ @Override
public Object clone()
{
try {
* Generates a hash code for the DateFormatSymbols object.
* @stable ICU 2.0
*/
+ @Override
public int hashCode() {
// Is this sufficient?
return requestedLocale.toString().hashCode();
* Overrides equals.
* @stable ICU 2.0
*/
+ @Override
public boolean equals(Object obj)
{
if (this == obj) return true;
}
UResourceBundle contextTransformsBundle = null;
try {
- contextTransformsBundle = (UResourceBundle)rb.getWithFallback("contextTransforms");
+ contextTransformsBundle = rb.getWithFallback("contextTransforms");
}
catch (MissingResourceException e) {
contextTransformsBundle = null; // probably redundant
import com.ibm.icu.impl.CharacterIteration;
abstract class DictionaryBreakEngine implements LanguageBreakEngine {
-
+
/* Helper class for improving readability of the Thai/Lao/Khmer word break
* algorithm.
*/
mark = current;
}
}
-
+
/**
* A deque-like structure holding raw ints.
* Partial, limited implementation, only what is needed by the dictionary implementation.
private int[] data = new int[50];
private int lastIdx = 4; // or base of stack. Index of element.
private int firstIdx = 4; // or Top of Stack. Index of element + 1.
-
+
int size() {
return firstIdx - lastIdx;
}
-
+
boolean isEmpty() {
return size() == 0;
}
-
+
private void grow() {
int[] newData = new int[data.length * 2];
System.arraycopy(data, 0, newData, 0, data.length);
data = newData;
}
-
+
void offer(int v) {
// Note that the actual use cases of offer() add at most one element.
// We make no attempt to handle more than a few.
assert lastIdx > 0;
data[--lastIdx] = v;
}
-
+
void push(int v) {
if (firstIdx >= data.length) {
grow();
}
data[firstIdx++] = v;
}
-
+
int pop() {
assert size() > 0;
return data[--firstIdx];
}
-
+
int peek() {
assert size() > 0;
return data[firstIdx - 1];
}
-
+
int peekLast() {
assert size() > 0;
return data[lastIdx];
}
-
+
int pollLast() {
assert size() > 0;
return data[lastIdx++];
}
-
+
boolean contains(int v) {
for (int i=lastIdx; i< firstIdx; i++) {
if (data[i] == v) {
return false;
}
}
-
+
UnicodeSet fSet = new UnicodeSet();
private BitSet fTypes = new BitSet(32);
/**
* @param breakTypes The types of break iterators that can use this engine.
- * For example, BreakIterator.KIND_LINE
+ * For example, BreakIterator.KIND_LINE
*/
public DictionaryBreakEngine(Integer... breakTypes) {
for (Integer type: breakTypes) {
}
}
+ @Override
public boolean handles(int c, int breakType) {
return fTypes.get(breakType) && // this type can use us
fSet.contains(c); // we recognize the character
}
- public int findBreaks(CharacterIterator text, int startPos, int endPos,
+ @Override
+ public int findBreaks(CharacterIterator text, int startPos, int endPos,
boolean reverse, int breakType, DequeI foundBreaks) {
int result = 0;
-
+
// Find the span of characters included in the set.
// The span to break begins at the current position int the text, and
// extends towards the start or end of the text, depending on 'reverse'.
return result;
}
-
+
void setCharacters(UnicodeSet set) {
fSet = new UnicodeSet(set);
fSet.compact();
public static DurationFormat getInstance(ULocale locale) {
return BasicDurationFormat.getInstance(locale);
}
-
+
/**
* Subclass interface
@Deprecated
protected DurationFormat() {
}
-
+
/**
* Subclass interface
* @internal
* @deprecated ICU 56
*/
@Deprecated
+ @Override
public abstract StringBuffer format(Object object, StringBuffer toAppend,
FieldPosition pos);
* DurationFormat cannot parse, by default. This method will throw an UnsupportedOperationException.
* @deprecated ICU 56
*/
+ @Override
@Deprecated
public Object parseObject(String source, ParsePosition pos) {
throw new UnsupportedOperationException();
* formatDurationFrom(long, long) using now
* as the reference date, and the difference between now and
* <code>targetDate.getTime()</code> as the duration.
- *
+ *
* @param targetDate the ending date
* @return the formatted time
* @deprecated ICU 56
* <p>
* This is a convenience method that calls formatDurationFrom
* using the current system time as the reference date.
- *
+ *
* @param duration the duration in milliseconds
* @return the formatted time
* @deprecated ICU 56
* The duration is expressed as the number of milliseconds in the
* past (negative values) or future (positive values) with respect
* to a reference date (expressed as milliseconds in epoch).
- *
+ *
* @param duration the duration in milliseconds
* @param referenceDate the date from which to compute the duration
* @return the formatted time
if (FACTORY_DISPLAYCONTEXT != null) {
try {
result = (LocaleDisplayNames) FACTORY_DISPLAYCONTEXT.invoke(null,
- locale, (Object[])contexts);
+ locale, contexts);
} catch (InvocationTargetException e) {
// fall through
} catch (IllegalAccessException e) {
this.collator = collator;
this.useSelf = useSelf;
}
+ @Override
public int compare(UiListItem o1, UiListItem o2) {
int result = useSelf ? collator.compare(o1.nameInSelf, o2.nameInSelf)
: collator.compare(o1.nameInDisplayLocale, o2.nameInDisplayLocale);
* System.out.println(msgFmt.format(args));
* args.put("num_files", 3);
* System.out.println(msgFmt.format(args));
- *
+ *
* <em>output</em>:
* There are no files on disk "MyDisk".
* There are 3 files on "MyDisk".
public ULocale getULocale() {
return ulocale;
}
-
+
/**
* Sets the pattern used by this message format.
* Parses the pattern and caches Format objects for simple argument types.
* an array of Object and this format uses named arguments
* @stable ICU 3.0
*/
+ @Override
public final StringBuffer format(Object arguments, StringBuffer result,
FieldPosition pos)
{
* expected by the corresponding argument or custom Format object.
* @stable ICU 3.8
*/
+ @Override
public AttributedCharacterIterator formatToCharacterIterator(Object arguments) {
if (arguments == null) {
throw new NullPointerException(
"This method is not available in MessageFormat objects " +
"that use named argument.");
}
-
+
// Count how many slots we need in the array.
int maxArgId = -1;
for (int partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
return resultArray;
}
-
+
/**
* {@icu} Parses the string, returning the results in a Map.
* This is similar to the version that returns an array
if (pos.getIndex() == backupStartPos) {
return null;
}
- return result;
+ return result;
}
-
+
/**
* Parses text from the beginning of the given string to produce an object
* array.
// We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
assert type==Part.Type.ARG_START : "Unexpected Part "+part+" in parsed message.";
int argLimit=msgPattern.getLimitPartIndex(i);
-
+
ArgType argType=part.getArgType();
part=msgPattern.getPart(++i);
// Compute the argId, so we can use it as a key.
* @throws NullPointerException if <code>pos</code> is null.
* @stable ICU 3.0
*/
+ @Override
public Object parseObject(String source, ParsePosition pos) {
if (!msgPattern.hasNamedArguments()) {
return parse(source, pos);
} else {
other.customFormatArgStarts = null;
}
-
+
if (cachedFormatters != null) {
other.cachedFormatters = new HashMap<Integer, Format>();
Iterator<Map.Entry<Integer, Format>> it = cachedFormatters.entrySet().iterator();
} else {
other.cachedFormatters = null;
}
-
+
other.msgPattern = msgPattern == null ? null : (MessagePattern)msgPattern.clone();
other.stockDateFormatter =
stockDateFormatter == null ? null : (DateFormat) stockDateFormatter.clone();
*
* @stable ICU 3.8
*/
+ @Override
protected Object readResolve() throws InvalidObjectException {
if (this.getClass() != MessageFormat.Field.class) {
throw new InvalidObjectException(
* as soon as it finds an argument, or it reaches the end of the string.
* @param from Index in the pattern string to start from.
* @return A substring from the pattern string representing the longest possible
- * substring with no arguments.
+ * substring with no arguments.
*/
private String getLiteralStringUntilNextArgument(int from) {
StringBuilder b = new StringBuilder();
msgFormat = mf;
this.type = type;
}
+ @Override
public String select(Object ctx, double number) {
if(rules == null) {
rules = PluralRules.forLocale(msgFormat.ulocale, type);
* @return a copy of this object.
* @stable ICU 4.8
*/
+ @Override
@SuppressWarnings("unchecked")
public MessagePattern cloneAsThawed() {
MessagePattern newMsg;
/**
* Freezes this object, making it immutable and thread-safe.
- * @return this
+ * @return this
* @stable ICU 4.8
*/
+ @Override
public MessagePattern freeze() {
frozen=true;
return this;
* @return true if this object is frozen.
* @stable ICU 4.8
*/
+ @Override
public boolean isFrozen() {
return frozen;
}
char c=msg.charAt(index++);
if(c=='\'') {
if(index==msg.length()) {
- // The apostrophe is the last character in the pattern.
+ // The apostrophe is the last character in the pattern.
// Add a Part for auto-quoting.
addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted
needsAutoQuoting=true;
* </pre>
*
* <p>To a user of your program, however, both of these sequences should be
- * treated as the same "user-level" character "A with acute accent". When you
- * are searching or comparing text, you must ensure that these two sequences are
+ * treated as the same "user-level" character "A with acute accent". When you
+ * are searching or comparing text, you must ensure that these two sequences are
* treated equivalently. In addition, you must handle characters with more than
* one accent. Sometimes the order of a character's combining accents is
* significant, while in other cases accent sequences in different orders are
* into the corresponding semantic characters. When sorting and searching, you
* will often want to use these mappings.
*
- * <p><code>normalize</code> helps solve these problems by transforming text into
- * the canonical composed and decomposed forms as shown in the first example
- * above. In addition, you can have it perform compatibility decompositions so
+ * <p><code>normalize</code> helps solve these problems by transforming text into
+ * the canonical composed and decomposed forms as shown in the first example
+ * above. In addition, you can have it perform compatibility decompositions so
* that you can treat compatibility characters the same as their equivalents.
* Finally, <code>normalize</code> rearranges accents into the proper canonical
* order, so that you do not have to worry about accent rearrangement on your
*
* <p>Form FCD, "Fast C or D", is also designed for collation.
* It allows to work on strings that are not necessarily normalized
- * with an algorithm (like in collation) that works under "canonical closure",
- * i.e., it treats precomposed characters and their decomposed equivalents the
+ * with an algorithm (like in collation) that works under "canonical closure",
+ * i.e., it treats precomposed characters and their decomposed equivalents the
* same.
*
- * <p>It is not a normalization form because it does not provide for uniqueness of
- * representation. Multiple strings may be canonically equivalent (their NFDs
+ * <p>It is not a normalization form because it does not provide for uniqueness of
+ * representation. Multiple strings may be canonically equivalent (their NFDs
* are identical) and may all conform to FCD without being identical themselves.
*
- * <p>The form is defined such that the "raw decomposition", the recursive
- * canonical decomposition of each character, results in a string that is
- * canonically ordered. This means that precomposed characters are allowed for
+ * <p>The form is defined such that the "raw decomposition", the recursive
+ * canonical decomposition of each character, results in a string that is
+ * canonically ordered. This means that precomposed characters are allowed for
* as long as their decompositions do not need canonical reordering.
*
* <p>Its advantage for a process like collation is that all NFD and most NFC texts
- * - and many unnormalized texts - already conform to FCD and do not need to be
- * normalized (NFD) for such a process. The FCD quick check will return YES for
+ * - and many unnormalized texts - already conform to FCD and do not need to be
+ * normalized (NFD) for such a process. The FCD quick check will return YES for
* most strings in practice.
*
* <p>normalize(FCD) may be implemented with NFD.
* <p>For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications):
* http://www.unicode.org/notes/tn5/#FCD
*
- * <p>ICU collation performs either NFD or FCD normalization automatically if
- * normalization is turned on for the collator object. Beyond collation and
- * string search, normalized strings may be useful for string equivalence
+ * <p>ICU collation performs either NFD or FCD normalization automatically if
+ * normalization is turned on for the collator object. Beyond collation and
+ * string search, normalized strings may be useful for string equivalence
* comparisons, transliteration/transcription, unique representations, etc.
*
* <p>The W3C generally recommends to exchange texts in NFC.
}
private static final class NONEMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; }
}
private static final class NFDMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) {
return (options&UNICODE_3_2) != 0 ?
NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2;
}
}
private static final class NFKDMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) {
return (options&UNICODE_3_2) != 0 ?
NFKD32ModeImpl.INSTANCE.normalizer2 : NFKDModeImpl.INSTANCE.normalizer2;
}
}
private static final class NFCMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) {
return (options&UNICODE_3_2) != 0 ?
NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2;
}
}
private static final class NFKCMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) {
return (options&UNICODE_3_2) != 0 ?
NFKC32ModeImpl.INSTANCE.normalizer2 : NFKCModeImpl.INSTANCE.normalizer2;
}
}
private static final class FCDMode extends Mode {
+ @Override
protected Normalizer2 getNormalizer2(int options) {
return (options&UNICODE_3_2) != 0 ?
FCD32ModeImpl.INSTANCE.normalizer2 : FCDModeImpl.INSTANCE.normalizer2;
}
}
- /**
+ /**
* No decomposition/composition.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
@Deprecated
public static final Mode NONE = new NONEMode();
- /**
+ /**
* Canonical decomposition.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
@Deprecated
public static final Mode NFD = new NFDMode();
- /**
+ /**
* Compatibility decomposition.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
@Deprecated
public static final Mode NFKD = new NFKDMode();
- /**
+ /**
* Canonical decomposition followed by canonical composition.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
@Deprecated
public static final Mode NFC = new NFCMode();
- /**
+ /**
* Default normalization.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
- public static final Mode DEFAULT = NFC;
+ public static final Mode DEFAULT = NFC;
- /**
+ /**
* Compatibility decomposition followed by canonical composition.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
@Deprecated
public static final Mode NFKC =new NFKCMode();
- /**
+ /**
* "Fast C or D" form.
*
* @deprecated ICU 56 Use {@link Normalizer2} instead.
/**
* Canonical decomposition followed by canonical composition. Used with the
- * {@link com.ibm.icu.text.Normalizer constructors} and the static
- * {@link #normalize normalize} method to determine the operation to be
+ * {@link com.ibm.icu.text.Normalizer constructors} and the static
+ * {@link #normalize normalize} method to determine the operation to be
* performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
- * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
+ * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>C</b>.
* <p>
/**
* Compatibility decomposition followed by canonical composition.
* Used with the {@link com.ibm.icu.text.Normalizer constructors} and the static
- * {@link #normalize normalize} method to determine the operation to be
+ * {@link #normalize normalize} method to determine the operation to be
* performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
- * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
+ * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KC</b>.
* <p>
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
- * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
+ * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>D</b>.
* <p>
/**
* Compatibility decomposition. This value is passed to the
- * {@link com.ibm.icu.text.Normalizer constructors} and the static
+ * {@link com.ibm.icu.text.Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
- * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
+ * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KD</b>.
* <p>
*/
@Deprecated
public static final int IGNORE_HANGUL = 0x0001;
-
+
/**
* Result values for quickCheck().
* For details see Unicode Technical Report 15.
//resultValue=value;
}
}
- /**
+ /**
* Indicates that string is not in the normalized format
* @stable ICU 2.8
*/
public static final QuickCheckResult NO = new QuickCheckResult(0);
-
- /**
+
+ /**
* Indicates that string is in the normalized format
* @stable ICU 2.8
*/
public static final QuickCheckResult YES = new QuickCheckResult(1);
- /**
- * Indicates it cannot be determined if string is in the normalized
+ /**
+ * Indicates it cannot be determined if string is in the normalized
* format without further thorough checks.
* @stable ICU 2.8
*/
public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
-
+
/**
* Option bit for compare:
* Case sensitively compare the strings
* @stable ICU 2.8
*/
public static final int FOLD_CASE_DEFAULT = UCharacter.FOLD_CASE_DEFAULT;
-
+
/**
* Option bit for compare:
* Both input strings are assumed to fulfill FCD conditions.
* @stable ICU 2.8
*/
public static final int INPUT_IS_FCD = 0x20000;
-
+
/**
* Option bit for compare:
* Perform case-insensitive comparison.
* @stable ICU 2.8
*/
public static final int COMPARE_IGNORE_CASE = 0x10000;
-
+
/**
* Option bit for compare:
* Compare strings in code point order instead of code unit order.
*/
public static final int COMPARE_CODE_POINT_ORDER = 0x8000;
- /**
+ /**
* Option value for case folding:
* Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
* and dotless i appropriately for Turkic languages (tr, az).
*/
@Deprecated
public static final int COMPARE_NORM_OPTIONS_SHIFT = 20;
-
+
//-------------------------------------------------------------------------
// Iterator constructors
//-------------------------------------------------------------------------
@Deprecated
public Normalizer(String str, Mode mode, int opt) {
this.text = UCharacterIterator.getInstance(str);
- this.mode = mode;
+ this.mode = mode;
this.options=opt;
norm2 = mode.getNormalizer2(opt);
buffer = new StringBuilder();
* Compose a string.
* The string will be composed to according to the specified mode.
* @param str The string to compose.
- * @param compat If true the string will be composed according to
- * NFKC rules and if false will be composed according to
+ * @param compat If true the string will be composed according to
+ * NFKC rules and if false will be composed according to
* NFC rules.
* @return String The composed string
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
public static String compose(String str, boolean compat) {
- return compose(str,compat,0);
+ return compose(str,compat,0);
}
-
+
/**
* Compose a string.
* The string will be composed to according to the specified mode.
* @param str The string to compose.
- * @param compat If true the string will be composed according to
- * NFKC rules and if false will be composed according to
+ * @param compat If true the string will be composed according to
+ * NFKC rules and if false will be composed according to
* NFC rules.
* @param options The only recognized option is UNICODE_3_2
* @return String The composed string
public static String compose(String str, boolean compat, int options) {
return getComposeNormalizer2(compat, options).normalize(str);
}
-
+
/**
* Compose a string.
* The string will be composed to according to the specified mode.
* @param source The char array to compose.
* @param target A char buffer to receive the normalized text.
- * @param compat If true the char array will be composed according to
- * NFKC rules and if false will be composed according to
+ * @param compat If true the char array will be composed according to
+ * NFKC rules and if false will be composed according to
* NFC rules.
* @param options The normalization options, ORed together (0 for no options).
- * @return int The total buffer size needed;if greater than length of
+ * @return int The total buffer size needed;if greater than length of
* result, the output was truncated.
- * @exception IndexOutOfBoundsException if target.length is less than the
+ * @exception IndexOutOfBoundsException if target.length is less than the
* required length
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
public static int compose(char[] source,char[] target, boolean compat, int options) {
return compose(source, 0, source.length, target, 0, target.length, compat, options);
}
-
+
/**
* Compose a string.
* The string will be composed to according to the specified mode.
* @param srcStart Start index of the source
* @param srcLimit Limit index of the source
* @param dest The char buffer to fill in
- * @param destStart Start index of the destination buffer
+ * @param destStart Start index of the destination buffer
* @param destLimit End index of the destination buffer
- * @param compat If true the char array will be composed according to
- * NFKC rules and if false will be composed according to
+ * @param compat If true the char array will be composed according to
+ * NFKC rules and if false will be composed according to
* NFC rules.
* @param options The normalization options, ORed together (0 for no options).
- * @return int The total buffer size needed;if greater than length of
+ * @return int The total buffer size needed;if greater than length of
* result, the output was truncated.
- * @exception IndexOutOfBoundsException if target.length is less than the
+ * @exception IndexOutOfBoundsException if target.length is less than the
* required length
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
* Decompose a string.
* The string will be decomposed to according to the specified mode.
* @param str The string to decompose.
- * @param compat If true the string will be decomposed according to NFKD
- * rules and if false will be decomposed according to NFD
+ * @param compat If true the string will be decomposed according to NFKD
+ * rules and if false will be decomposed according to NFD
* rules.
* @return String The decomposed string
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
public static String decompose(String str, boolean compat) {
- return decompose(str,compat,0);
+ return decompose(str,compat,0);
}
-
+
/**
* Decompose a string.
* The string will be decomposed to according to the specified mode.
* @param str The string to decompose.
- * @param compat If true the string will be decomposed according to NFKD
- * rules and if false will be decomposed according to NFD
+ * @param compat If true the string will be decomposed according to NFKD
+ * rules and if false will be decomposed according to NFD
* rules.
* @param options The normalization options, ORed together (0 for no options).
* @return String The decomposed string
* The string will be decomposed to according to the specified mode.
* @param source The char array to decompose.
* @param target A char buffer to receive the normalized text.
- * @param compat If true the char array will be decomposed according to NFKD
- * rules and if false will be decomposed according to
+ * @param compat If true the char array will be decomposed according to NFKD
+ * rules and if false will be decomposed according to
* NFD rules.
- * @return int The total buffer size needed;if greater than length of
+ * @return int The total buffer size needed;if greater than length of
* result,the output was truncated.
* @param options The normalization options, ORed together (0 for no options).
* @exception IndexOutOfBoundsException if the target capacity is less than
public static int decompose(char[] source,char[] target, boolean compat, int options) {
return decompose(source, 0, source.length, target, 0, target.length, compat, options);
}
-
+
/**
* Decompose a string.
* The string will be decomposed to according to the specified mode.
* @param srcStart Start index of the source
* @param srcLimit Limit index of the source
* @param dest The char buffer to fill in
- * @param destStart Start index of the destination buffer
+ * @param destStart Start index of the destination buffer
* @param destLimit End index of the destination buffer
- * @param compat If true the char array will be decomposed according to NFKD
- * rules and if false will be decomposed according to
+ * @param compat If true the char array will be decomposed according to NFKD
+ * rules and if false will be decomposed according to
* NFD rules.
* @param options The normalization options, ORed together (0 for no options).
- * @return int The total buffer size needed;if greater than length of
+ * @return int The total buffer size needed;if greater than length of
* result,the output was truncated.
* @exception IndexOutOfBoundsException if the target capacity is less than
* the required length
public static String normalize(String str, Mode mode, int options) {
return mode.getNormalizer2(options).normalize(str);
}
-
+
/**
* Normalize a string.
- * The string will be normalized according to the specified normalization
+ * The string will be normalized according to the specified normalization
* mode and options.
* @param src The string to normalize.
- * @param mode The normalization mode; one of Normalizer.NONE,
- * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
+ * @param mode The normalization mode; one of Normalizer.NONE,
+ * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
* Normalizer.NFKD, Normalizer.DEFAULT
* @return the normalized string
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
public static String normalize(String src,Mode mode) {
- return normalize(src, mode, 0);
+ return normalize(src, mode, 0);
}
/**
* Normalize a string.
- * The string will be normalized according to the specified normalization
+ * The string will be normalized according to the specified normalization
* mode and options.
* @param source The char array to normalize.
* @param target A char buffer to receive the normalized text.
- * @param mode The normalization mode; one of Normalizer.NONE,
- * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
+ * @param mode The normalization mode; one of Normalizer.NONE,
+ * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
* Normalizer.NFKD, Normalizer.DEFAULT
* @param options The normalization options, ORed together (0 for no options).
- * @return int The total buffer size needed;if greater than length of
+ * @return int The total buffer size needed;if greater than length of
* result, the output was truncated.
- * @exception IndexOutOfBoundsException if the target capacity is less
+ * @exception IndexOutOfBoundsException if the target capacity is less
* than the required length
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
* @param srcStart Start index of the source
* @param srcLimit Limit index of the source
* @param dest The char buffer to fill in
- * @param destStart Start index of the destination buffer
+ * @param destStart Start index of the destination buffer
* @param destLimit End index of the destination buffer
- * @param mode The normalization mode; one of Normalizer.NONE,
- * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
+ * @param mode The normalization mode; one of Normalizer.NONE,
+ * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
* Normalizer.NFKD, Normalizer.DEFAULT
- * @param options The normalization options, ORed together (0 for no options).
- * @return int The total buffer size needed;if greater than length of
+ * @param options The normalization options, ORed together (0 for no options).
+ * @return int The total buffer size needed;if greater than length of
* result, the output was truncated.
- * @exception IndexOutOfBoundsException if the target capacity is
+ * @exception IndexOutOfBoundsException if the target capacity is
* less than the required length
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
- public static int normalize(char[] src,int srcStart, int srcLimit,
+ public static int normalize(char[] src,int srcStart, int srcLimit,
char[] dest,int destStart, int destLimit,
Mode mode, int options) {
CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
* Convenience method.
*
* @param source string for determining if it is in a normalized format
- * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
+ * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
* Normalizer.NFKC,Normalizer.NFKD)
- * @return Return code to specify if the text is normalized or not
+ * @return Return code to specify if the text is normalized or not
* (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
}
/**
- * Performing quick check on a string, to quickly determine if the string is
+ * Performing quick check on a string, to quickly determine if the string is
* in a particular normalization format.
* Three types of result can be returned Normalizer.YES, Normalizer.NO or
* Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
* string is in the desired normalized format, Normalizer.NO determines that
- * argument string is not in the desired normalized format. A
- * Normalizer.MAYBE result indicates that a more thorough check is required,
- * the user may have to put the string in its normalized form and compare
+ * argument string is not in the desired normalized format. A
+ * Normalizer.MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare
* the results.
*
* @param source string for determining if it is in a normalized format
- * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
+ * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
* Normalizer.NFKC,Normalizer.NFKD)
* @param options Options for use with exclusion set and tailored Normalization
- * The only option that is currently recognized is UNICODE_3_2
- * @return Return code to specify if the text is normalized or not
+ * The only option that is currently recognized is UNICODE_3_2
+ * @return Return code to specify if the text is normalized or not
* (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
/**
* Convenience method.
*
- * @param source Array of characters for determining if it is in a
+ * @param source Array of characters for determining if it is in a
* normalized format
- * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
+ * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
* Normalizer.NFKC,Normalizer.NFKD)
* @param options Options for use with exclusion set and tailored Normalization
* The only option that is currently recognized is UNICODE_3_2
- * @return Return code to specify if the text is normalized or not
+ * @return Return code to specify if the text is normalized or not
* (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
}
/**
- * Performing quick check on a string, to quickly determine if the string is
+ * Performing quick check on a string, to quickly determine if the string is
* in a particular normalization format.
* Three types of result can be returned Normalizer.YES, Normalizer.NO or
* Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
* string is in the desired normalized format, Normalizer.NO determines that
- * argument string is not in the desired normalized format. A
- * Normalizer.MAYBE result indicates that a more thorough check is required,
- * the user may have to put the string in its normalized form and compare
+ * argument string is not in the desired normalized format. A
+ * Normalizer.MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare
* the results.
*
* @param source string for determining if it is in a normalized format
* @param start the start index of the source
* @param limit the limit index of the source it is equal to the length
- * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
+ * @param mode normalization format (Normalizer.NFC,Normalizer.NFD,
* Normalizer.NFKC,Normalizer.NFKD)
* @param options Options for use with exclusion set and tailored Normalization
- * The only option that is currently recognized is UNICODE_3_2
- * @return Return code to specify if the text is normalized or not
+ * The only option that is currently recognized is UNICODE_3_2
+ * @return Return code to specify if the text is normalized or not
* (Normalizer.YES, Normalizer.NO or
* Normalizer.MAYBE)
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
- public static QuickCheckResult quickCheck(char[] source,int start,
- int limit, Mode mode,int options) {
+ public static QuickCheckResult quickCheck(char[] source,int start,
+ int limit, Mode mode,int options) {
CharBuffer srcBuffer = CharBuffer.wrap(source, start, limit - start);
return mode.getNormalizer2(options).quickCheck(srcBuffer);
}
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
- * @param src The input array of characters to be checked to see if
+ * @param src The input array of characters to be checked to see if
* it is normalized
* @param start The strart index in the source
* @param limit The limit index in the source
* @param mode the normalization mode
* @param options Options for use with exclusion set and tailored Normalization
- * The only option that is currently recognized is UNICODE_3_2
+ * The only option that is currently recognized is UNICODE_3_2
* @return Boolean value indicating whether the source string is in the
* "mode" normalization form
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
@Deprecated
public static boolean isNormalized(char[] src,int start,
- int limit, Mode mode,
+ int limit, Mode mode,
int options) {
CharBuffer srcBuffer = CharBuffer.wrap(src, start, limit - start);
return mode.getNormalizer2(options).isNormalized(srcBuffer);
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
- * @param str the input string to be checked to see if it is
+ * @param str the input string to be checked to see if it is
* normalized
* @param mode the normalization mode
* @param options Options for use with exclusion set and tailored Normalization
- * The only option that is currently recognized is UNICODE_3_2
+ * The only option that is currently recognized is UNICODE_3_2
* @see #isNormalized
* @deprecated ICU 56 Use {@link Normalizer2} instead.
*/
/**
* Convenience Method
- * @param char32 the input code point to be checked to see if it is
+ * @param char32 the input code point to be checked to see if it is
* normalized
* @param mode the normalization mode
* @param options Options for use with exclusion set and tailored Normalization
- * The only option that is currently recognized is UNICODE_3_2
+ * The only option that is currently recognized is UNICODE_3_2
*
* @see #isNormalized
* @deprecated ICU 56 Use {@link Normalizer2} instead.
* (and optionally case-folding) both strings entirely,
* improving performance significantly.
*
- * Bulk normalization is only necessary if the strings do not fulfill the
- * FCD conditions. Only in this case, and only if the strings are relatively
+ * Bulk normalization is only necessary if the strings do not fulfill the
+ * FCD conditions. Only in this case, and only if the strings are relatively
* long, is memory allocated temporarily.
* For FCD strings and short non-FCD strings there is no memory allocation.
*
* @param s2 Second source character array.
* @param s2Start start index of the source
* @param s2Limit limit of the source
- *
+ *
* @param options A bit set of options:
* - FOLD_CASE_DEFAULT or 0 is used for default options:
* Case-sensitive comparison in code unit order, and the input strings
* are quick-checked for FCD.
*
* - INPUT_IS_FCD
- * Set if the caller knows that both s1 and s2 fulfill the FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD
* conditions.If not set, the function will quickCheck for FCD
* and normalize if necessary.
*
public static int compare(char[] s1, int s1Start, int s1Limit,
char[] s2, int s2Start, int s2Limit,
int options) {
- if( s1==null || s1Start<0 || s1Limit<0 ||
+ if( s1==null || s1Start<0 || s1Limit<0 ||
s2==null || s2Start<0 || s2Limit<0 ||
s1Limit<s1Start || s2Limit<s2Start
) {
throw new IllegalArgumentException();
}
- return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start),
- CharBuffer.wrap(s2, s2Start, s2Limit-s2Start),
+ return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start),
+ CharBuffer.wrap(s2, s2Start, s2Limit-s2Start),
options);
- }
+ }
/**
* Compare two strings for canonical equivalence.
* (and optionally case-folding) both strings entirely,
* improving performance significantly.
*
- * Bulk normalization is only necessary if the strings do not fulfill the
- * FCD conditions. Only in this case, and only if the strings are relatively
+ * Bulk normalization is only necessary if the strings do not fulfill the
+ * FCD conditions. Only in this case, and only if the strings are relatively
* long, is memory allocated temporarily.
* For FCD strings and short non-FCD strings there is no memory allocation.
*
* are quick-checked for FCD.
*
* - INPUT_IS_FCD
- * Set if the caller knows that both s1 and s2 fulfill the FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD
* conditions. If not set, the function will quickCheck for FCD
* and normalize if necessary.
*
* are quick-checked for FCD.
*
* - INPUT_IS_FCD
- * Set if the caller knows that both s1 and s2 fulfill the FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD
* conditions. If not set, the function will quickCheck for FCD
* and normalize if necessary.
*
* @param right Right source array.
* @param rightStart start in the right array.
* @param rightLimit limit in the right array (==length)
- * @param dest The output buffer; can be null if destStart==destLimit==0
+ * @param dest The output buffer; can be null if destStart==destLimit==0
* for pure preflighting.
* @param destStart start in the destination array
* @param destLimit limit in the destination array (==length)
* @param mode The normalization mode.
* @param options The normalization options, ORed together (0 for no options).
- * @return Length of output (number of chars) when successful or
+ * @return Length of output (number of chars) when successful or
* IndexOutOfBoundsException
- * @exception IndexOutOfBoundsException whose message has the string
- * representation of destination capacity required.
+ * @exception IndexOutOfBoundsException whose message has the string
+ * representation of destination capacity required.
* @see #normalize
* @see #next
* @see #previous
*/
@Deprecated
public static int concatenate(char[] left, int leftStart, int leftLimit,
- char[] right, int rightStart, int rightLimit,
+ char[] right, int rightStart, int rightLimit,
char[] dest, int destStart, int destLimit,
Normalizer.Mode mode, int options) {
if(dest == null) {
throw new IllegalArgumentException();
}
-
+
/* check for overlapping right and destination */
if (right == dest && rightStart < destLimit && destStart < rightLimit) {
throw new IllegalArgumentException("overlapping right and dst ranges");
}
-
+
/* allow left==dest */
StringBuilder destBuilder=new StringBuilder(leftLimit-leftStart+rightLimit-rightStart+16);
destBuilder.append(left, leftStart, leftLimit-leftStart);
* dest=normalize(left+right, mode)
* </code>
*
- * For details see concatenate
+ * For details see concatenate
*
* @param left Left source string.
* @param right Right source string.
return DONE;
}
}
-
-
+
+
/**
* Return the previous character in the normalized text and decrement
* the iteration position by one. If the beginning
return DONE;
}
}
-
+
/**
* Reset the index to the beginning of the text.
* This is equivalent to setIndexOnly(startIndex)).
currentIndex=nextIndex=0;
clearBuffer();
}
-
+
/**
* Set the iteration position in the input text that is being normalized,
* without any immediate normalization.
currentIndex=nextIndex=index;
clearBuffer();
}
-
+
/**
* Set the iteration position in the input text that is being normalized
* and return the first normalized character at that position.
}
///CLOVER:ON
/**
- * Retrieve the index of the start of the input text. This is the begin
- * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
+ * Retrieve the index of the start of the input text. This is the begin
+ * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
* <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
* @deprecated ICU 2.2. Use startIndex() instead.
* @return The codepoint as an int
reset();
return next();
}
-
+
/**
* Return the last character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to be just before the
}
/**
- * Retrieve the index of the start of the input text. This is the begin
- * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
+ * Retrieve the index of the start of the input text. This is the begin
+ * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
* <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
* @return The current iteration position
* @deprecated ICU 56
return 0;
}
}
-
+
/**
* Gets the underlying text storage
* @param fillIn the char buffer to fill the UTF-16 units.
public int getText(char[] fillIn) {
return text.getText(fillIn);
}
-
+
/**
* Gets the length of underlying text storage
* @return the length
public int getLength() {
return text.getLength();
}
-
+
/**
* Returns the text under iteration as a string
* @return a copy of the text under iteration.
public String getText() {
return text.getText();
}
-
+
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning of the input text.
UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
if (newIter == null) {
throw new IllegalStateException("Could not create a new UCharacterIterator");
- }
+ }
text = newIter;
reset();
}
UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
if (newIter == null) {
throw new IllegalStateException("Could not create a new UCharacterIterator");
- }
+ }
text = newIter;
reset();
}
UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
if (newIter == null) {
throw new IllegalStateException("Could not create a new UCharacterIterator");
- }
+ }
text = newIter;
reset();
}
UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
if (newIter == null) {
throw new IllegalStateException("Could not create a new UCharacterIterator");
- }
+ }
text = newIter;
reset();
}
* @deprecated ICU 56
*/
@Deprecated
- public void setText(UCharacterIterator newText) {
+ public void setText(UCharacterIterator newText) {
try{
UCharacterIterator newIter = (UCharacterIterator)newText.clone();
if (newIter == null) {
}
return cmpEquivFold(s1, s2, options);
- }
+ }
/*
* Compare two strings for canonical equivalence.
throw new IndexOutOfBoundsException(Integer.toString(len));
}
}
+ @Override
public Appendable append(char c) {
if(offset<limit) {
chars[offset]=c;
++offset;
return this;
}
+ @Override
public Appendable append(CharSequence s) {
return append(s, 0, s.length());
}
+ @Override
public Appendable append(CharSequence s, int sStart, int sLimit) {
int len=sLimit-sStart;
if(len<=(limit-offset)) {
* need not worry about the plural cases of a language and
* does not have to define the plural cases; they can simply
* use the predefined keywords. The whole plural formatting of messages can
- * be done using localized patterns from resource bundles. For predefined plural
+ * be done using localized patterns from resource bundles. For predefined plural
* rules, see the CLDR <i>Language Plural Rules</i> page at
- * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
+ * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
* </ul>
*
* <h4>Usage of <code>PluralFormat</code></h4>
* between the {curly braces} and their sub-message,
* and between the '=' and the number of an explicitValue.
* <p>
- * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
- * 'other'. You always have to define a message text for the default plural case
+ * There are 6 predefined case keywords in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
+ * 'other'. You always have to define a message text for the default plural case
* "<code>other</code>" which is contained in every rule set.
* If you do not specify a message text for a particular plural case, the
* message text of the plural case "<code>other</code>" gets assigned to this
* The MessagePattern which contains the parsed structure of the pattern string.
*/
transient private MessagePattern msgPattern;
-
+
/**
* Obsolete with use of MessagePattern since ICU 4.8. Used to be:
* The format messages for each plural case. It is a mapping:
// We could avoid this adapter class if we made PluralSelector public
// (or at least publicly visible) and had PluralRules implement PluralSelector.
private final class PluralSelectorAdapter implements PluralSelector {
+ @Override
public String select(Object context, double number) {
FixedDecimal dec = (FixedDecimal) context;
assert dec.source == (dec.isNegative ? -number : number);
* @throws IllegalArgumentException if number is not an instance of Number
* @stable ICU 3.8
*/
+ @Override
public StringBuffer format(Object number, StringBuffer toAppendTo,
FieldPosition pos) {
if (!(number instanceof Number)) {
* @throws UnsupportedOperationException will always be thrown by this method.
* @stable ICU 3.8
*/
+ @Override
public Object parseObject(String source, ParsePosition pos) {
throw new UnsupportedOperationException();
}
/**
* Utility class for returning the plural category for a range of numbers, such as 1–5, so that appropriate messages can
* be chosen. The rules for determining this value vary widely across locales.
- *
+ *
* @author markdavis
* @internal
* @deprecated This API is ICU internal only.
/**
* Constructor
- *
+ *
* @internal
* @deprecated This API is ICU internal only.
*/
return 0 == compareTo((Matrix) other);
}
+ @Override
public int compareTo(Matrix o) {
for (int i = 0; i < data.length; ++i) {
int diff = data[i] - o.data[i];
result.data = data.clone();
return result;
}
-
+
@Override
public String toString() {
StringBuilder result = new StringBuilder();
/**
* Internal method for building. If the start or end are null, it means everything of that type.
- *
+ *
* @param rangeStart
* plural category for the start of the range
* @param rangeEnd
/**
* Returns the appropriate plural category for a range from start to end. If there is no available data, then
* 'end' is returned as an implicit value. (Such an implicit value can be tested for with {@link #isExplicit}.)
- *
+ *
* @param start
* plural category for the start of the range
* @param end
/**
* Returns whether the appropriate plural category for a range from start to end
* is explicitly in the data (vs given an implicit value). See also {@link #get}.
- *
+ *
* @param start
* plural category for the start of the range
* @param end
/**
* Internal method to determines whether the StandardPluralCategories was explicitly used in any add statement.
- *
+ *
* @param count
* plural category to test
* @return true if set
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public int compareTo(PluralRanges that) {
return matrix.compareTo(that.matrix);
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public boolean isFrozen() {
return isFrozen;
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public PluralRanges freeze() {
isFrozen = true;
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public PluralRanges cloneAsThawed() {
PluralRanges result = new PluralRanges();
import java.text.ParseException;
/**
- * Exception that signals an error has occurred while parsing the
- * input to StringPrep or IDNA.
+ * Exception that signals an error has occurred while parsing the
+ * input to StringPrep or IDNA.
*
* @author Ram Viswanadha
* @stable ICU 2.8
public class StringPrepParseException extends ParseException {
// Generated by serialver from JDK 1.4.1_01
static final long serialVersionUID = 7160264827701651255L;
-
+
/**
* @stable ICU 2.8
*/
* @stable ICU 2.8
*/
public static final int BUFFER_OVERFLOW_ERROR = 9;
-
+
/**
* @stable ICU 2.8
*/
public static final int ZERO_LENGTH_LABEL = 10;
-
+
/**
* @stable ICU 3.8
*/
public static final int DOMAIN_NAME_TOO_LONG_ERROR = 11;
-
+
/**
* Construct a ParseException object with the given message
* and error code
- *
+ *
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
* @stable ICU 2.8
this.error = error;
this.line = 0;
}
-
+
/**
* Construct a ParseException object with the given message and
* error code
- *
+ *
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
- * @param rules The input rules string
+ * @param rules The input rules string
* @param pos The position of error in the rules string
* @stable ICU 2.8
*/
public StringPrepParseException(String message,int error, String rules, int pos){
super(message, -1);
this.error = error;
- setContext(rules,pos);
+ setContext(rules,pos);
this.line = 0;
}
/**
* Construct a ParseException object with the given message and error code
- *
+ *
* @param message A string describing the type of error that occurred
* @param error The error that has occurred
- * @param rules The input rules string
+ * @param rules The input rules string
* @param pos The position of error in the rules string
- * @param lineNumber The line number at which the error has occurred.
+ * @param lineNumber The line number at which the error has occurred.
* If the parse engine is not using this field, it should set it to zero. Otherwise
* it should be a positive integer. The default value of this field
* is -1. It will be set to 0 if the code populating this struct is not
public StringPrepParseException(String message, int error, String rules, int pos, int lineNumber){
super(message, -1);
this.error = error;
- setContext(rules,pos);
+ setContext(rules,pos);
this.line = lineNumber;
}
/**
* Compare this ParseException to another and evaluate if they are equal.
* The comparison works only on the type of error and does not compare
* the rules strings, if any, for equality.
- *
+ *
* @param other The exception that this object should be compared to
* @return true if the objects are equal, false if unequal
* @stable ICU 2.8
*/
+ @Override
public boolean equals(Object other){
if(!(other instanceof StringPrepParseException)){
return false;
}
return ((StringPrepParseException)other).error == this.error;
-
+
}
-
+
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public int hashCode() {
assert false : "hashCode not designed";
return 42;
}
-
+
/**
* Returns the position of error in the rules string
- *
+ *
* @return String
* @stable ICU 2.8
*/
+ @Override
public String toString(){
StringBuilder buf = new StringBuilder();
buf.append(super.getMessage());
}
private int error;
-
+
/**
* The line on which the error occurred. If the parse engine
* is not using this field, it should set it to zero. Otherwise
* May be the empty string if not implemented by parser.
*/
private StringBuffer postContext = new StringBuffer();
-
+
private static final int PARSE_CONTEXT_LEN = 16;
-
+
private void setPreContext(String str, int pos){
setPreContext(str.toCharArray(),pos);
}
-
+
private void setPreContext(char[] str, int pos){
int start = (pos <= PARSE_CONTEXT_LEN)? 0 : (pos - (PARSE_CONTEXT_LEN-1));
int len = (start <= PARSE_CONTEXT_LEN)? start : PARSE_CONTEXT_LEN;
preContext.append(str,start,len);
-
+
}
-
+
private void setPostContext(String str, int pos){
setPostContext(str.toCharArray(),pos);
}
-
+
private void setPostContext(char[] str, int pos){
int start = pos;
- int len = str.length - start;
+ int len = str.length - start;
postContext.append(str,start,len);
}
-
+
private void setContext(String str,int pos){
setPreContext(str,pos);
setPostContext(str,pos);
}
-
+
/**
- * Returns the error code of this exception.
+ * Returns the error code of this exception.
* This method is only used for testing to verify the error.
* @return The error code
* @stable ICU 3.8
* @return result
* @stable ICU 3.8
*/
+ @Override
public String transform(String source);
}
\ No newline at end of file
* Unicode Locale Data Markup Language (LDML)</a>. {@link TimeZoneNames} represents the
* time zone display name data model and this class implements the algorithm for actual
* formatting and parsing.
- *
+ *
* @see SimpleDateFormat
* @see TimeZoneNames
* @stable ICU 49
/**
* Time zone display format style enum used by format/parse APIs in <code>TimeZoneFormat</code>.
- *
+ *
* @see TimeZoneFormat#format(Style, TimeZone, long)
* @see TimeZoneFormat#format(Style, TimeZone, long, Output)
* @see TimeZoneFormat#parse(Style, String, ParsePosition, Output)
EXEMPLAR_LOCATION (0x0800);
final int flag;
-
+
private Style(int flag) {
this.flag = flag;
}
/**
* Offset pattern type enum.
- *
+ *
* @see TimeZoneFormat#getGMTOffsetPattern(GMTOffsetPatternType)
* @see TimeZoneFormat#setGMTOffsetPattern(GMTOffsetPatternType, String)
* @stable ICU 49
/**
* Time type enum used for receiving time type (standard time, daylight time or unknown)
* in <code>TimeZoneFormat</code> APIs.
- *
+ *
* @stable ICU 49
*/
public enum TimeType {
* When parsing a time zone display name in {@link Style#SPECIFIC_SHORT},
* look for the IANA tz database compatible zone abbreviations in addition
* to the localized names coming from the {@link TimeZoneNames} currently
- * used by the {@link TimeZoneFormat}.
+ * used by the {@link TimeZoneFormat}.
* @stable ICU 54
*/
TZ_DATABASE_ABBREVIATIONS;
private static final String UNKNOWN_LOCATION = "Unknown";
// Order of GMT offset pattern parsing, *_HMS must be evaluated first
- // because *_HM is most likely a substring of *_HMS
+ // because *_HM is most likely a substring of *_HMS
private static final GMTOffsetPatternType[] PARSE_GMT_OFFSET_TYPES = {
GMTOffsetPatternType.POSITIVE_HMS, GMTOffsetPatternType.NEGATIVE_HMS,
GMTOffsetPatternType.POSITIVE_HM, GMTOffsetPatternType.NEGATIVE_HM,
* <p><b>Note</b>: The instance returned by this method is frozen. If you want to
* customize a TimeZoneFormat, you must use {@link #cloneAsThawed()} to get a
* thawed copy first.
- *
+ *
* @param locale the locale.
* @return a frozen instance of <code>TimeZoneFormat</code> for the given locale.
* @stable ICU 49
* <p><b>Note</b>: The instance returned by this method is frozen. If you want to
* customize a TimeZoneFormat, you must use {@link #cloneAsThawed()} to get a
* thawed copy first.
- *
+ *
* @param locale the {@link Locale}.
* @return a frozen instance of <code>TimeZoneFormat</code> for the given locale.
* @stable ICU 54
/**
* Returns the time zone display name data used by this instance.
- *
+ *
* @return the time zone display name data.
* @see #setTimeZoneNames(TimeZoneNames)
* @stable ICU 49
/**
* Sets the time zone display name data to this instance.
- *
+ *
* @param tznames the time zone display name data.
* @return this object.
* @throws UnsupportedOperationException when this object is frozen.
/**
* Returns the localized GMT format pattern.
- *
+ *
* @return the localized GMT format pattern.
* @see #setGMTPattern(String)
* @stable ICU 49
/**
* Sets the localized GMT format pattern. The pattern must contain
* a single argument {0}, for example "GMT {0}".
- *
+ *
* @param pattern the localized GMT format pattern string
* @return this object.
* @throws IllegalArgumentException when the pattern string does not contain "{0}"
/**
* Returns the offset pattern used for localized GMT format.
- *
+ *
* @param type the offset pattern enum
* @see #setGMTOffsetPattern(GMTOffsetPatternType, String)
* @stable ICU 49
/**
* Sets the offset pattern for the given offset type.
- *
+ *
* @param type the offset pattern.
* @param pattern the pattern string.
* @return this object.
/**
* Returns the decimal digit characters used for localized GMT format in a single string
* containing from 0 to 9 in the ascending order.
- *
+ *
* @return the decimal digits for localized GMT format.
* @see #setGMTOffsetDigits(String)
* @stable ICU 49
/**
* Sets the decimal digit characters used for localized GMT format.
- *
+ *
* @param digits a string contains the decimal digit characters from 0 to 9 n the ascending order.
* @return this object.
* @throws IllegalArgumentException when the string did not contain ten characters.
/**
* Returns the localized GMT format string for GMT(UTC) itself (GMT offset is 0).
- *
+ *
* @return the localized GMT string string for GMT(UTC) itself.
* @see #setGMTZeroFormat(String)
* @stable ICU 49
/**
* Sets the localized GMT format string for GMT(UTC) itself (GMT offset is 0).
- *
+ *
* @param gmtZeroFormat the localized GMT format string for GMT(UTC).
* @return this object.
* @throws UnsupportedOperationException when this object is frozen.
* <p>
* <b>Note:</b> By default, an instance of <code>TimeZoneFormat</code>
* created by {#link {@link #getInstance(ULocale)} has no parse options set.
- *
+ *
* @param options the default parse options.
* @return this object.
* @see ParseOption
/**
* Returns the ISO 8601 basic time zone string for the given offset.
* For example, "-08", "-0830" and "Z"
- *
+ *
* @param offset the offset from GMT(UTC) in milliseconds.
* @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0.
* @param isShort true if shortest form is used.
/**
* Returns the ISO 8601 extended time zone string for the given offset.
* For example, "-08:00", "-08:30" and "Z"
- *
+ *
* @param offset the offset from GMT(UTC) in milliseconds.
* @param useUtcIndicator true if ISO 8601 UTC indicator "Z" is used when the offset is 0.
* @param isShort true if shortest form is used.
/**
* Returns the display name of the time zone at the given date for
* the style.
- *
+ *
* <p><b>Note</b>: A style may have fallback styles defined. For example,
* when <code>GENERIC_LONG</code> is requested, but there is no display name
* data available for <code>GENERIC_LONG</code> style, the implementation
* See UTS#35 UNICODE LOCALE DATA MARKUP LANGUAGE (LDML)
* <a href="http://www.unicode.org/reports/tr35/#Time_Zone_Fallback">Appendix J: Time Zone Display Name</a>
* for the details.
- *
+ *
* @param style the style enum (e.g. <code>GENERIC_LONG</code>, <code>LOCALIZED_GMT</code>...)
* @param tz the time zone.
* @param date the date.
* in addition to the argument list of {@link #format(Style, TimeZone, long)}.
* The argument is used for receiving the time type (standard time
* or daylight saving time, or unknown) actually used for the display name.
- *
+ *
* @param style the style enum (e.g. <code>GENERIC_LONG</code>, <code>LOCALIZED_GMT</code>...)
* @param tz the time zone.
* @param date the date.
* basic or extended time zone string. When the given string is not an ISO 8601 time
* zone string, this method sets the current position as the error index
* to <code>ParsePosition pos</code> and returns 0.
- *
+ *
* @param text the text contains ISO 8601 style time zone string (e.g. "-08", "-0800", "-08:00", and "Z")
* at the position.
* @param pos the position.
* offset format string. When the given string cannot be parsed, this method
* sets the current position as the error index to <code>ParsePosition pos</code>
* and returns 0.
- *
+ *
* @param text the text contains a localized GMT offset string at the position.
* @param pos the position.
* @return the offset from GMT(UTC) in milliseconds for the given localized GMT
* offset format string. When the given string cannot be parsed, this method
* sets the current position as the error index to <code>ParsePosition pos</code>
* and returns 0.
- *
+ *
* @param text the text contains a short localized GMT offset string at the position.
* @param pos the position.
* @return the offset from GMT(UTC) in milliseconds for the given short localized GMT
/**
* Returns a <code>TimeZone</code> by parsing the time zone string according to
* the parse position, the style and the parse options.
- *
+ *
* @param text the text contains a time zone string at the position.
* @param style the format style.
* @param pos the position.
* <p>
* <b>Note</b>: This method is equivalent to {@link #parse(Style, String, ParsePosition, EnumSet, Output)
* parse(style, text, pos, null, timeType)}.
- *
+ *
* @param text the text contains a time zone string at the position.
* @param style the format style
* @param pos the position.
* <p>
* <b>Note</b>: This method is equivalent to {@link #parse(Style, String, ParsePosition, EnumSet, Output)
* parse(Style.GENERIC_LOCATION, text, pos, EnumSet.of(ParseOption.ALL_STYLES), timeType)}.
- *
+ *
* @param text the text contains a time zone string at the position.
* @param pos the position.
* @return A <code>TimeZone</code>, or null if the input could not be parsed.
/**
* {@inheritDoc}
- *
+ *
* @stable ICU 49
*/
@Override
/**
* {@inheritDoc}
- *
+ *
* @stable ICU 49
*/
@Override
/**
* {@inheritDoc}
- *
+ *
* @stable ICU 49
*/
@Override
/**
* Private method returning the time zone's specific format string.
- *
+ *
* @param tz the time zone
* @param stdType the name type used for standard time
* @param dstType the name type used for daylight time
/**
* Private method returning the time zone's exemplar location string.
* This method will never return null.
- *
+ *
* @param tz the time zone
* @return the time zone's exemplar location name.
*/
* Private method returns a time zone ID. If tzID is not null, the value of tzID is returned.
* If tzID is null, then this method look up a time zone ID for the current region. This is a
* small helper method used by the parse implementation method
- *
+ *
* @param tzID
* the time zone ID or null
* @param mzID
* the locale of this instance. When a generic name is coming from
* a meta zone, this region is used for checking if the time zone
* is a reference zone of the meta zone.
- *
+ *
* @return the target region
*/
private synchronized String getTargetRegion() {
* Parses the localized GMT pattern string and initialize
* localized gmt pattern fields including {{@link #_gmtPatternTokens}.
* This method must be also called at deserialization time.
- *
+ *
* @param gmtPattern the localized GMT pattern string such as "GMT {0}"
* @throws IllegalArgumentException when the pattern string does not contain "{0}"
*/
/**
* Unquotes the message format style pattern.
- *
+ *
* @param s the pattern
* @return the unquoted pattern string
*/
* Initialize localized GMT format offset hour/min/sec patterns.
* This method parses patterns into optimized run-time format.
* This method must be called at deserialization time.
- *
+ *
* @param gmtOffsetPatterns patterns, String[4]
* @throws IllegalArgumentException when patterns are not valid
*/
/**
* Parse the GMT offset pattern into runtime optimized format
- *
+ *
* @param pattern the offset pattern string
* @param letters the required pattern letters such as "Hm"
* @return An array of Object. Each array entry is either String (representing
/**
* Appends seconds field to the offset pattern with hour/minute
- *
+ *
* @param offsetHM the offset pattern including hours and minutes fields
* @return the offset pattern including hours, minutes and seconds fields
*/
/**
* Truncates minutes field from the offset pattern with hour/minute
- *
+ *
* @param offsetHM the offset pattern including hours and minutes fields
* @return the offset pattern including only hours field
*/
* Appends localized digits to the buffer.
* <p>
* Note: This code assumes that the input number is 0 - 59
- *
+ *
* @param buf the target buffer
* @param n the integer number
* @param minDigits the minimum digits width
* offset format string. When the given string cannot be parsed, this method
* sets the current position as the error index to <code>ParsePosition pos</code>
* and returns 0.
- *
+ *
* @param text the text contains a localized GMT offset string at the position.
* @param pos the position.
* @param isShort true if this parser to try the short format first
break;
}
idx += len;
-
+
// Offset part
int[] offsetLen = new int[1];
offset = parseOffsetFields(text, idx, false, offsetLen);
break;
}
idx += offsetLen[0];
-
+
// Suffix part
len = _gmtPatternSuffix.length();
if (len > 0 && !text.regionMatches(true, idx, _gmtPatternSuffix, 0, len)) {
/**
* Parses localized GMT offset fields into offset.
- *
+ *
* @param text the input text
* @param start the start index
* @param isShort true if this is a short format - currently not used
/**
* Parses localized GMT offset fields with the given pattern
- *
+ *
* @param text the input text
* @param start the start index
* @param patternItems the pattern (already itemized)
* Reads an offset field value. This method will stop parsing when
* 1) number of digits reaches <code>maxDigits</code>
* 2) just before already parsed number exceeds <code>maxVal</code>
- *
+ *
* @param text the text
* @param start the start offset
* @param minDigits the minimum number of required digits
* Break input String into String[]. Each array element represents
* a code point. This method is used for parsing localized digit
* characters and support characters in Unicode supplemental planes.
- *
+ *
* @param str the string
* @return the array of code points in String[]
*/
* (basic format, extended format, or UTC indicator). When the given string is not an ISO 8601 time
* zone string, this method sets the current position as the error index
* to <code>ParsePosition pos</code> and returns 0.
- *
+ *
* @param text the text contains ISO 8601 style time zone string (e.g. "-08", "-08:00", "Z")
* at the position.
* @param pos the position.
* <p>
* Note: This method expects the input position is already at the start of
* ASCII digits and does not parse sign (+/-).
- *
+ *
* @param text The text contains a sequence of ASCII digits
* @param pos The parse position
* @param minFields The minimum Fields to be parsed
* <p>
* Note: This method expects the input position is already at the start of
* ASCII digits and does not parse sign (+/-).
- *
+ *
* @param text The text
* @param pos The parse position
* @param sep The separator character
};
/**
- *
+ *
* @param oos the object output stream
* @throws IOException
*/
}
/**
- *
+ *
* @param ois the object input stream
* @throws ClassNotFoundException
* @throws IOException
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public boolean isFrozen() {
return _frozen;
}
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZoneFormat freeze() {
_frozen = true;
return this;
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZoneFormat cloneAsThawed() {
TimeZoneFormat copy = (TimeZoneFormat)super.clone();
copy._frozen = false;
*/
package com.ibm.icu.text;
-
import java.text.CharacterIterator;
import com.ibm.icu.impl.CharacterIteratorWrapper;
import com.ibm.icu.impl.UCharArrayIterator;
import com.ibm.icu.impl.UCharacterIteratorWrapper;
-
/**
- * Abstract class that defines an API for iteration on text objects.This is an
- * interface for forward and backward iteration and random access into a text
- * object. Forward iteration is done with post-increment and backward iteration
- * is done with pre-decrement semantics, while the
- * <code>java.text.CharacterIterator</code> interface methods provided forward
- * iteration with "pre-increment" and backward iteration with pre-decrement
- * semantics. This API is more efficient for forward iteration over code points.
- * The other major difference is that this API can do both code unit and code point
- * iteration, <code>java.text.CharacterIterator</code> can only iterate over
- * code units and is limited to BMP (0 - 0xFFFF)
+ * Abstract class that defines an API for iteration on text objects.This is an interface for forward and backward
+ * iteration and random access into a text object. Forward iteration is done with post-increment and backward iteration
+ * is done with pre-decrement semantics, while the <code>java.text.CharacterIterator</code> interface methods provided
+ * forward iteration with "pre-increment" and backward iteration with pre-decrement semantics. This API is more
+ * efficient for forward iteration over code points. The other major difference is that this API can do both code unit
+ * and code point iteration, <code>java.text.CharacterIterator</code> can only iterate over code units and is limited to
+ * BMP (0 - 0xFFFF)
+ *
* @author Ram
* @stable ICU 2.4
*/
-public abstract class UCharacterIterator
- implements Cloneable,UForwardCharacterIterator {
+public abstract class UCharacterIterator implements Cloneable, UForwardCharacterIterator {
/**
* Protected default constructor for the subclasses
+ *
* @stable ICU 2.4
*/
- protected UCharacterIterator(){
+ protected UCharacterIterator() {
}
-
+
// static final methods ----------------------------------------------------
-
+
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * <code>Replaceable</code> object.
- * @param source a valid source as a <code>Replaceable</code> object
+ * Returns a <code>UCharacterIterator</code> object given a <code>Replaceable</code> object.
+ *
+ * @param source
+ * a valid source as a <code>Replaceable</code> object
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
*/
- public static final UCharacterIterator getInstance(Replaceable source){
+ public static final UCharacterIterator getInstance(Replaceable source) {
return new ReplaceableUCharacterIterator(source);
}
-
+
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * source string.
- * @param source a string
+ * Returns a <code>UCharacterIterator</code> object given a source string.
+ *
+ * @param source
+ * a string
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
*/
- public static final UCharacterIterator getInstance(String source){
+ public static final UCharacterIterator getInstance(String source) {
return new ReplaceableUCharacterIterator(source);
}
-
+
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * source character array.
- * @param source an array of UTF-16 code units
+ * Returns a <code>UCharacterIterator</code> object given a source character array.
+ *
+ * @param source
+ * an array of UTF-16 code units
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
*/
- public static final UCharacterIterator getInstance(char[] source){
- return getInstance(source,0,source.length);
+ public static final UCharacterIterator getInstance(char[] source) {
+ return getInstance(source, 0, source.length);
}
-
+
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * source character array.
- * @param source an array of UTF-16 code units
+ * Returns a <code>UCharacterIterator</code> object given a source character array.
+ *
+ * @param source
+ * an array of UTF-16 code units
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
*/
- public static final UCharacterIterator getInstance(char[] source, int start, int limit){
- return new UCharArrayIterator(source,start,limit);
+ public static final UCharacterIterator getInstance(char[] source, int start, int limit) {
+ return new UCharArrayIterator(source, start, limit);
}
+
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * source StringBuffer.
- * @param source an string buffer of UTF-16 code units
+ * Returns a <code>UCharacterIterator</code> object given a source StringBuffer.
+ *
+ * @param source
+ * an string buffer of UTF-16 code units
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
*/
- public static final UCharacterIterator getInstance(StringBuffer source){
+ public static final UCharacterIterator getInstance(StringBuffer source) {
return new ReplaceableUCharacterIterator(source);
}
/**
- * Returns a <code>UCharacterIterator</code> object given a
- * CharacterIterator.
- * @param source a valid CharacterIterator object.
+ * Returns a <code>UCharacterIterator</code> object given a CharacterIterator.
+ *
+ * @param source
+ * a valid CharacterIterator object.
* @return UCharacterIterator object
- * @exception IllegalArgumentException if the argument is null
+ * @exception IllegalArgumentException
+ * if the argument is null
* @stable ICU 2.4
- */
- public static final UCharacterIterator getInstance(CharacterIterator source){
+ */
+ public static final UCharacterIterator getInstance(CharacterIterator source) {
return new CharacterIteratorWrapper(source);
}
-
+
// public methods ----------------------------------------------------------
/**
- * Returns a <code>java.text.CharacterIterator</code> object for
- * the underlying text of this iterator. The returned iterator is
- * independent of this iterator.
+ * Returns a <code>java.text.CharacterIterator</code> object for the underlying text of this iterator. The returned
+ * iterator is independent of this iterator.
+ *
* @return java.text.CharacterIterator object
- * @stable ICU 2.4
+ * @stable ICU 2.4
*/
- public CharacterIterator getCharacterIterator(){
+ public CharacterIterator getCharacterIterator() {
return new UCharacterIteratorWrapper(this);
- }
-
+ }
+
/**
- * Returns the code unit at the current index. If index is out
- * of range, returns DONE. Index is not changed.
+ * Returns the code unit at the current index. If index is out of range, returns DONE. Index is not changed.
+ *
* @return current code unit
* @stable ICU 2.4
*/
public abstract int current();
-
+
/**
- * Returns the codepoint at the current index.
- * If the current index is invalid, DONE is returned.
- * If the current index points to a lead surrogate, and there is a following
- * trail surrogate, then the code point is returned. Otherwise, the code
- * unit at index is returned. Index is not changed.
+ * Returns the codepoint at the current index. If the current index is invalid, DONE is returned. If the current
+ * index points to a lead surrogate, and there is a following trail surrogate, then the code point is returned.
+ * Otherwise, the code unit at index is returned. Index is not changed.
+ *
* @return current codepoint
* @stable ICU 2.4
*/
- public int currentCodePoint(){
+ public int currentCodePoint() {
int ch = current();
- if(UTF16.isLeadSurrogate((char)ch)){
+ if (UTF16.isLeadSurrogate((char) ch)) {
// advance the index to get the
// next code point
next();
// current should never change
// the current index so back off
previous();
-
- if(UTF16.isTrailSurrogate((char)ch2)){
- // we found a surrogate pair
+
+ if (UTF16.isTrailSurrogate((char) ch2)) {
+ // we found a surrogate pair
// return the codepoint
- return Character.toCodePoint((char)ch, (char)ch2);
+ return Character.toCodePoint((char) ch, (char) ch2);
}
}
return ch;
}
-
+
/**
* Returns the length of the text
+ *
* @return length of the text
* @stable ICU 2.4
*/
public abstract int getLength();
-
/**
* Gets the current index in text.
+ *
* @return current index in text.
* @stable ICU 2.4
*/
public abstract int getIndex();
-
/**
- * Returns the UTF16 code unit at index, and increments to the next
- * code unit (post-increment semantics). If index is out of
- * range, DONE is returned, and the iterator is reset to the limit
- * of the text.
- * @return the next UTF16 code unit, or DONE if the index is at the limit
- * of the text.
- * @stable ICU 2.4
+ * Returns the UTF16 code unit at index, and increments to the next code unit (post-increment semantics). If index
+ * is out of range, DONE is returned, and the iterator is reset to the limit of the text.
+ *
+ * @return the next UTF16 code unit, or DONE if the index is at the limit of the text.
+ * @stable ICU 2.4
*/
+ @Override
public abstract int next();
/**
- * Returns the code point at index, and increments to the next code
- * point (post-increment semantics). If index does not point to a
- * valid surrogate pair, the behavior is the same as
- * <code>next()</code>. Otherwise the iterator is incremented past
- * the surrogate pair, and the code point represented by the pair
- * is returned.
- * @return the next codepoint in text, or DONE if the index is at
- * the limit of the text.
- * @stable ICU 2.4
+ * Returns the code point at index, and increments to the next code point (post-increment semantics). If index does
+ * not point to a valid surrogate pair, the behavior is the same as <code>next()</code>. Otherwise the iterator is
+ * incremented past the surrogate pair, and the code point represented by the pair is returned.
+ *
+ * @return the next codepoint in text, or DONE if the index is at the limit of the text.
+ * @stable ICU 2.4
*/
- public int nextCodePoint(){
+ @Override
+ public int nextCodePoint() {
int ch1 = next();
- if(UTF16.isLeadSurrogate((char)ch1)){
+ if (UTF16.isLeadSurrogate((char) ch1)) {
int ch2 = next();
- if(UTF16.isTrailSurrogate((char)ch2)){
- return Character.toCodePoint((char)ch1, (char)ch2);
- }else if (ch2 != DONE) {
+ if (UTF16.isTrailSurrogate((char) ch2)) {
+ return Character.toCodePoint((char) ch1, (char) ch2);
+ } else if (ch2 != DONE) {
// unmatched surrogate so back out
previous();
}
}
/**
- * Decrement to the position of the previous code unit in the
- * text, and return it (pre-decrement semantics). If the
- * resulting index is less than 0, the index is reset to 0 and
- * DONE is returned.
- * @return the previous code unit in the text, or DONE if the new
- * index is before the start of the text.
- * @stable ICU 2.4
+ * Decrement to the position of the previous code unit in the text, and return it (pre-decrement semantics). If the
+ * resulting index is less than 0, the index is reset to 0 and DONE is returned.
+ *
+ * @return the previous code unit in the text, or DONE if the new index is before the start of the text.
+ * @stable ICU 2.4
*/
public abstract int previous();
-
/**
- * Retreat to the start of the previous code point in the text,
- * and return it (pre-decrement semantics). If the index is not
- * preceeded by a valid surrogate pair, the behavior is the same
- * as <code>previous()</code>. Otherwise the iterator is
- * decremented to the start of the surrogate pair, and the code
- * point represented by the pair is returned.
- * @return the previous code point in the text, or DONE if the new
- * index is before the start of the text.
- * @stable ICU 2.4
+ * Retreat to the start of the previous code point in the text, and return it (pre-decrement semantics). If the
+ * index is not preceeded by a valid surrogate pair, the behavior is the same as <code>previous()</code>. Otherwise
+ * the iterator is decremented to the start of the surrogate pair, and the code point represented by the pair is
+ * returned.
+ *
+ * @return the previous code point in the text, or DONE if the new index is before the start of the text.
+ * @stable ICU 2.4
*/
- public int previousCodePoint(){
+ public int previousCodePoint() {
int ch1 = previous();
- if(UTF16.isTrailSurrogate((char)ch1)){
+ if (UTF16.isTrailSurrogate((char) ch1)) {
int ch2 = previous();
- if(UTF16.isLeadSurrogate((char)ch2)){
- return Character.toCodePoint((char)ch2, (char)ch1);
- }else if (ch2 != DONE) {
- //unmatched trail surrogate so back out
+ if (UTF16.isLeadSurrogate((char) ch2)) {
+ return Character.toCodePoint((char) ch2, (char) ch1);
+ } else if (ch2 != DONE) {
+ // unmatched trail surrogate so back out
next();
- }
+ }
}
return ch1;
}
/**
* Sets the index to the specified index in the text.
- * @param index the index within the text.
- * @exception IndexOutOfBoundsException is thrown if an invalid index is
- * supplied
+ *
+ * @param index
+ * the index within the text.
+ * @exception IndexOutOfBoundsException
+ * is thrown if an invalid index is supplied
* @stable ICU 2.4
*/
public abstract void setIndex(int index);
/**
* Sets the current index to the limit.
+ *
* @stable ICU 2.4
*/
public void setToLimit() {
setIndex(getLength());
}
-
+
/**
* Sets the current index to the start.
+ *
* @stable ICU 2.4
*/
public void setToStart() {
}
/**
- * Fills the buffer with the underlying text storage of the iterator
- * If the buffer capacity is not enough a exception is thrown. The capacity
- * of the fill in buffer should at least be equal to length of text in the
- * iterator obtained by calling <code>getLength()</code>).
- * <b>Usage:</b>
- *
+ * Fills the buffer with the underlying text storage of the iterator If the buffer capacity is not enough a
+ * exception is thrown. The capacity of the fill in buffer should at least be equal to length of text in the
+ * iterator obtained by calling <code>getLength()</code>). <b>Usage:</b>
+ *
* <pre>
* UChacterIterator iter = new UCharacterIterator.getInstance(text);
* char[] buf = new char[iter.getLength()];
* iter.getText(buf);
- *
+ *
* OR
* char[] buf= new char[1];
* int len = 0;
* }
* </pre>
*
- * @param fillIn an array of chars to fill with the underlying UTF-16 code
- * units.
- * @param offset the position within the array to start putting the data.
+ * @param fillIn
+ * an array of chars to fill with the underlying UTF-16 code units.
+ * @param offset
+ * the position within the array to start putting the data.
* @return the number of code units added to fillIn, as a convenience
- * @exception IndexOutOfBoundsException exception if there is not enough
- * room after offset in the array, or if offset < 0.
- * @stable ICU 2.4
+ * @exception IndexOutOfBoundsException
+ * exception if there is not enough room after offset in the array, or if offset < 0.
+ * @stable ICU 2.4
*/
- public abstract int getText(char[] fillIn, int offset);
+ public abstract int getText(char[] fillIn, int offset);
/**
- * Convenience override for <code>getText(char[], int)</code> that provides
- * an offset of 0.
- * @param fillIn an array of chars to fill with the underlying UTF-16 code
- * units.
+ * Convenience override for <code>getText(char[], int)</code> that provides an offset of 0.
+ *
+ * @param fillIn
+ * an array of chars to fill with the underlying UTF-16 code units.
* @return the number of code units added to fillIn, as a convenience
- * @exception IndexOutOfBoundsException exception if there is not enough
- * room in the array.
- * @stable ICU 2.4
+ * @exception IndexOutOfBoundsException
+ * exception if there is not enough room in the array.
+ * @stable ICU 2.4
*/
public final int getText(char[] fillIn) {
return getText(fillIn, 0);
}
-
+
/**
* Convenience method for returning the underlying text storage as as string
+ *
* @return the underlying text storage in the iterator as a string
* @stable ICU 2.4
*/
getText(text);
return new String(text);
}
-
+
/**
- * Moves the current position by the number of code units
- * specified, either forward or backward depending on the sign
- * of delta (positive or negative respectively). If the resulting
- * index would be less than zero, the index is set to zero, and if
- * the resulting index would be greater than limit, the index is
- * set to limit.
+ * Moves the current position by the number of code units specified, either forward or backward depending on the
+ * sign of delta (positive or negative respectively). If the resulting index would be less than zero, the index is
+ * set to zero, and if the resulting index would be greater than limit, the index is set to limit.
*
- * @param delta the number of code units to move the current
- * index.
+ * @param delta
+ * the number of code units to move the current index.
* @return the new index.
- * @exception IndexOutOfBoundsException is thrown if an invalid index is
- * supplied
- * @stable ICU 2.4
- *
+ * @exception IndexOutOfBoundsException
+ * is thrown if an invalid index is supplied
+ * @stable ICU 2.4
+ *
*/
public int moveIndex(int delta) {
int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
}
/**
- * Moves the current position by the number of code points
- * specified, either forward or backward depending on the sign of
- * delta (positive or negative respectively). If the current index
- * is at a trail surrogate then the first adjustment is by code
- * unit, and the remaining adjustments are by code points. If the
- * resulting index would be less than zero, the index is set to
- * zero, and if the resulting index would be greater than limit,
- * the index is set to limit.
- * @param delta the number of code units to move the current index.
- * @return the new index
- * @exception IndexOutOfBoundsException is thrown if an invalid delta is
- * supplied
+ * Moves the current position by the number of code points specified, either forward or backward depending on the
+ * sign of delta (positive or negative respectively). If the current index is at a trail surrogate then the first
+ * adjustment is by code unit, and the remaining adjustments are by code points. If the resulting index would be
+ * less than zero, the index is set to zero, and if the resulting index would be greater than limit, the index is
+ * set to limit.
+ *
+ * @param delta
+ * the number of code units to move the current index.
+ * @return the new index
+ * @exception IndexOutOfBoundsException
+ * is thrown if an invalid delta is supplied
* @stable ICU 2.4
*/
- public int moveCodePointIndex(int delta){
- if(delta>0){
- while(delta>0 && nextCodePoint() != DONE){delta--;}
- }else{
- while(delta<0 && previousCodePoint() != DONE){delta++;}
+ public int moveCodePointIndex(int delta) {
+ if (delta > 0) {
+ while (delta > 0 && nextCodePoint() != DONE) {
+ delta--;
+ }
+ } else {
+ while (delta < 0 && previousCodePoint() != DONE) {
+ delta++;
+ }
}
- if(delta!=0){
+ if (delta != 0) {
throw new IndexOutOfBoundsException();
}
-
+
return getIndex();
}
/**
- * Creates a copy of this iterator, independent from other iterators.
- * If it is not possible to clone the iterator, returns null.
+ * Creates a copy of this iterator, independent from other iterators. If it is not possible to clone the iterator,
+ * returns null.
+ *
* @return copy of this iterator
* @stable ICU 2.4
*/
- public Object clone() throws CloneNotSupportedException{
+ @Override
+ public Object clone() throws CloneNotSupportedException {
return super.clone();
- }
-
-}
+ }
+}
* filters. Matches a single 16-bit code unit at offset.
* @stable ICU 2.0
*/
+ @Override
public int matches(Replaceable text,
int[] offset,
int limit,
* Return a new set that is equivalent to this one.
* @stable ICU 2.0
*/
+ @Override
public Object clone() {
if (isFrozen()) {
return this;
* will produce another set that is equal to this one.
* @stable ICU 2.0
*/
+ @Override
public String toPattern(boolean escapeUnprintable) {
if (pat != null && !escapeUnprintable) {
return pat;
* indexing.
* @stable ICU 2.0
*/
+ @Override
public boolean matchesIndexValue(int v) {
/* The index value v, in the range [0,255], is contained in this set if
* it is contained in any pair of this set. Pairs either have the high
* longest possible multichar string.
* @stable ICU 2.0
*/
+ @Override
public int matches(Replaceable text,
int[] offset,
int limit,
// about them here. If we ever allow zero-length strings
// we much check for them here.
if (contains(UnicodeMatcher.ETHER)) {
- return incremental ? U_PARTIAL_MATCH : U_MATCH;
+ return incremental ? U_PARTIAL_MATCH : U_MATCH;
} else {
return U_MISMATCH;
}
// Strings are sorted, so we can optimize in the
// forward direction.
if (forward && c > firstChar) break;
- if (c != firstChar) continue;
+ if (c != firstChar) continue;
int length = matchRest(text, offset[0], limit, trial);
}
/**
- * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1.
+ * Tests whether the text matches at the offset. If so, returns the end of the longest substring that it matches. If not, returns -1.
* @internal
* @deprecated This API is ICU internal only.
*/
* @param toUnionTo the set into which to union the source characters
* @stable ICU 2.2
*/
+ @Override
public void addMatchSetTo(UnicodeSet toUnionTo) {
toUnionTo.addAll(this);
}
// c is before start of next range
list[i] = c;
// if we touched the HIGH mark, then add a new one
- if (c == MAX_VALUE) {
+ if (c == MAX_VALUE) {
ensureCapacity(len+1);
list[len++] = HIGH;
}
if (s.length() == 1) return s.charAt(0);
// at this point, len = 2
- int cp = UTF16.charAt(s, 0);
+ int cp = UTF16.charAt(s, 0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
}
*/
public final UnicodeSet retain(CharSequence cs) {
- int cp = getSingleCP(cs);
+ int cp = getSingleCP(cs);
if (cp < 0) {
String s = cs.toString();
boolean isIn = strings.contains(s);
* @return true if the test condition is met
* @stable ICU 2.0
*/
+ @Override
public boolean contains(int c) {
if (c < MIN_VALUE || c > MAX_VALUE) {
throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(c, 6));
* @return <tt>true</tt> if the specified Object is equal to this set.
* @stable ICU 2.0
*/
+ @Override
public boolean equals(Object o) {
if (o == null) {
return false;
* @see java.lang.Object#hashCode()
* @stable ICU 2.0
*/
+ @Override
public int hashCode() {
int result = len;
for (int i = 0; i < len; ++i) {
* Return a programmer-readable string representation of this object.
* @stable ICU 2.0
*/
+ @Override
public String toString() {
return toPattern(true);
}
// Add constants to make the applyPattern() code easier to follow.
- private static final int LAST0_START = 0,
- LAST1_RANGE = 1,
+ private static final int LAST0_START = 0,
+ LAST1_RANGE = 1,
LAST2_SET = 2;
- private static final int MODE0_NONE = 0,
- MODE1_INBRACKET = 1,
+ private static final int MODE0_NONE = 0,
+ MODE1_INBRACKET = 1,
MODE2_OUTBRACKET = 2;
- private static final int SETMODE0_NONE = 0,
- SETMODE1_UNICODESET = 1,
- SETMODE2_PROPERTYPAT = 2,
+ private static final int SETMODE0_NONE = 0,
+ SETMODE1_UNICODESET = 1,
+ SETMODE2_PROPERTYPAT = 2,
SETMODE3_PREPARSED = 3;
/**
}
/**
- * Add the contents of the collection (as strings) into this UnicodeSet.
+ * Add the contents of the collection (as strings) into this UnicodeSet.
* The collection must not contain null.
* @param source the collection to add
* @return a reference to this object
private void ensureCapacity(int newLen) {
if (newLen <= list.length) return;
- int[] temp = new int[newLen + GROW_EXTRA];
+ int[] temp = new int[newLen + GROW_EXTRA];
System.arraycopy(list, 0, temp, 0, len);
list = temp;
}
private static class NumericValueFilter implements Filter {
double value;
NumericValueFilter(double value) { this.value = value; }
+ @Override
public boolean contains(int ch) {
return UCharacter.getUnicodeNumericValue(ch) == value;
}
private static class GeneralCategoryMaskFilter implements Filter {
int mask;
GeneralCategoryMaskFilter(int mask) { this.mask = mask; }
+ @Override
public boolean contains(int ch) {
return ((1 << UCharacter.getType(ch)) & mask) != 0;
}
this.prop = prop;
this.value = value;
}
+ @Override
public boolean contains(int ch) {
return UCharacter.getIntPropertyValue(ch, prop) == value;
}
private static class ScriptExtensionsFilter implements Filter {
int script;
ScriptExtensionsFilter(int script) { this.script = script; }
+ @Override
public boolean contains(int c) {
return UScript.hasScript(c, script);
}
private static class VersionFilter implements Filter {
VersionInfo version;
VersionFilter(VersionInfo version) { this.version = version; }
+ @Override
public boolean contains(int ch) {
VersionInfo v = UCharacter.getAge(ch);
// Reference comparison ok; VersionInfo caches and reuses
* @draft ICU3.8 (retain)
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public UnicodeMatcher lookupMatcher(int i) {
return null;
}
* <p>
* This routine will be called whenever the parsing of a UnicodeSet pattern finds such a
* propertyName+propertyValue combination.
- *
+ *
* @param propertyName
* the name of the property
* @param propertyValue
* @draft ICU3.8 (retain)
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public char[] lookup(String s) {
return null;
}
* @draft ICU3.8 (retain)
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public String parseReference(String text, ParsePosition pos, int limit) {
return null;
}
/**
* Is this frozen, according to the Freezable interface?
- *
+ *
* @return value
* @stable ICU 3.8
*/
+ @Override
public boolean isFrozen() {
return (bmpSet != null || stringSpan != null);
}
/**
* Freeze this class, according to the Freezable interface.
- *
+ *
* @return this
* @stable ICU 4.4
*/
+ @Override
public UnicodeSet freeze() {
if (!isFrozen()) {
// Do most of what compact() does before freezing because
* @return the clone, not frozen
* @stable ICU 4.4
*/
+ @Override
public UnicodeSet cloneAsThawed() {
UnicodeSet result = new UnicodeSet(this);
assert !result.isFrozen();
/**
* A struct-like class used for iteration through ranges, for faster iteration than by String.
* Read about the restrictions on usage in {@link UnicodeSet#ranges()}.
- *
+ *
* @stable ICU 54
*/
public static class EntryRange {
/**
* The starting code point of the range.
- *
+ *
* @stable ICU 54
*/
public int codepoint;
/**
* The ending code point of the range
- *
+ *
* @stable ICU 54
*/
public int codepointEnd;
/**
* {@inheritDoc}
- *
+ *
* @stable ICU 54
*/
@Override
public String toString() {
StringBuilder b = new StringBuilder();
- return (
+ return (
codepoint == codepointEnd ? _appendToPat(b, codepoint, false)
: _appendToPat(_appendToPat(b, codepoint, false).append('-'), codepointEnd, false))
.toString();
*
* <p><b>Warning: </b>To iterate over the full contents, you have to also iterate over the strings.
*
- * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
+ * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
* Do not alter the UnicodeSet while iterating.
- *
+ *
* <pre>
* // Sample code
* for (EntryRange range : us1.ranges()) {
* // do something with each string;
* }
* </pre>
- *
+ *
* @stable ICU 54
*/
public Iterable<EntryRange> ranges() {
}
private class EntryRangeIterable implements Iterable<EntryRange> {
+ @Override
public Iterator<EntryRange> iterator() {
return new EntryRangeIterator();
}
int pos;
EntryRange result = new EntryRange();
+ @Override
public boolean hasNext() {
return pos < len-1;
}
+ @Override
public EntryRange next() {
if (pos < len-1) {
result.codepoint = list[pos++];
}
return result;
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
/**
* Returns a string iterator. Uses the same order of iteration as {@link UnicodeSetIterator}.
- * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
+ * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
* Do not alter the UnicodeSet while iterating.
* @see java.util.Set#iterator()
* @stable ICU 4.4
*/
+ @Override
public Iterator<String> iterator() {
return new UnicodeSetIterator2(this);
}
- // Cover for string iteration.
+ // Cover for string iteration.
private static class UnicodeSetIterator2 implements Iterator<String> {
// Invariants:
// sourceList != null then sourceList[item] is a valid character
/* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
+ @Override
public boolean hasNext() {
return sourceList != null || stringIterator.hasNext();
}
/* (non-Javadoc)
* @see java.util.Iterator#next()
*/
+ @Override
public String next() {
if (sourceList == null) {
return stringIterator.next();
/* (non-Javadoc)
* @see java.util.Iterator#remove()
*/
+ @Override
public void remove() {
throw new UnsupportedOperationException();
- }
+ }
}
/**
* @see java.lang.Comparable#compareTo(java.lang.Object)
* @stable ICU 4.4
*/
+ @Override
public int compareTo(UnicodeSet o) {
return compareTo(o, ComparisonStyle.SHORTER_FIRST);
}
/**
* Utility to compare a string to a code point.
* Same results as turning the code point into a string (with the [ugly] new StringBuilder().appendCodePoint(codepoint).toString())
- * and comparing, but much faster (no object creation).
+ * and comparing, but much faster (no object creation).
* Actually, there is one difference; a null compares as less.
* Note that this (=String) order is UTF-16 order -- *not* code point order.
* @stable ICU 4.4
/**
* Utility to compare a string to a code point.
- * Same results as turning the code point into a string and comparing, but much faster (no object creation).
+ * Same results as turning the code point into a string and comparing, but much faster (no object creation).
* Actually, there is one difference; a null compares as less.
* Note that this (=String) order is UTF-16 order -- *not* code point order.
* @stable ICU 4.4
}
/**
- * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set.
- * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E
+ * Simplify the ranges in a Unicode set by merging any ranges that are only separated by characters in the dontCare set.
+ * For example, the ranges: \\u2E80-\\u2E99\\u2E9B-\\u2EF3\\u2F00-\\u2FD5\\u2FF0-\\u2FFB\\u3000-\\u303E change to \\u2E80-\\u303E
* if the dontCare set includes unassigned characters (for a particular version of Unicode).
* @param dontCare Set with the don't-care characters for spanning
* @return the input set, modified
* Unassigned characters (gc=Cn) are different than in ICU other than in ICU, you MUST call
* {@code UnicodeProperty.ResetCacheProperties} afterwards. If you then call {@code UnicodeSet.setDefaultXSymbolTable}
* with null to clear the value, you MUST also call {@code UnicodeProperty.ResetCacheProperties}.
- *
+ *
* @param xSymbolTable the new default symbol table.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) {
- INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated.
+ INCLUSIONS = null; // If the properties override inclusions, these have to be regenerated.
XSYMBOL_TABLE = xSymbolTable;
}
}
* <code>AnnualTimeZoneRule</code> is a class used for representing a time zone
* rule which takes effect annually. Years used in this class are
* all Gregorian calendar years.
- *
+ *
* @stable ICU 3.8
*/
public class AnnualTimeZoneRule extends TimeZoneRule {
* Constructs a <code>AnnualTimeZoneRule</code> with the name, the GMT offset of its
* standard time, the amount of daylight saving offset adjustment,
* the annual start time rule and the start/until years.
- *
+ *
* @param name The time zone name.
* @param rawOffset The GMT offset of its standard time in milliseconds.
* @param dstSavings The amount of daylight saving offset adjustment in
* @param startYear The first year when this rule takes effect.
* @param endYear The last year when this rule takes effect. If this
* rule is effective forever in future, specify MAX_YEAR.
- *
+ *
* @stable ICU 3.8
*/
public AnnualTimeZoneRule(String name, int rawOffset, int dstSavings,
/**
* Gets the start date/time rule associated used by this rule.
- *
+ *
* @return An <code>AnnualDateTimeRule</code> which represents the start date/time
* rule used by this time zone rule.
- *
+ *
* @stable ICU 3.8
*/
public DateTimeRule getRule() {
/**
* Gets the first year when this rule takes effect.
- *
+ *
* @return The start year of this rule. The year is in Gregorian calendar
* with 0 == 1 BCE, -1 == 2 BCE, etc.
- *
+ *
* @stable ICU 3.8
*/
public int getStartYear() {
/**
* Gets the end year when this rule takes effect.
- *
+ *
* @return The end year of this rule (inclusive). The year is in Gregorian calendar
* with 0 == 1 BCE, -1 == 2 BCE, etc.
- *
+ *
* @stable ICU 3.8
*/
public int getEndYear() {
/**
* Gets the time when this rule takes effect in the given year.
- *
+ *
* @param year The Gregorian year, with 0 == 1 BCE, -1 == 2 BCE, etc.
* @param prevRawOffset The standard time offset from UTC before this rule
* takes effect in milliseconds.
* @param prevDSTSavings The amount of daylight saving offset from the
* standard time.
- *
+ *
* @return The time when this rule takes effect in the year, or
* null if this rule is not applicable in the year.
- *
+ *
* @stable ICU 3.8
*/
public Date getStartInYear(int year, int prevRawOffset, int prevDSTSavings) {
ruleDay += 7 * (weeks - 1);
} else {
after = false;
- ruleDay = Grego.fieldsToDay(year, dateTimeRule.getRuleMonth(),
+ ruleDay = Grego.fieldsToDay(year, dateTimeRule.getRuleMonth(),
Grego.monthLength(year, dateTimeRule.getRuleMonth()));
ruleDay += 7 * (weeks + 1);
}
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getFirstStart(int prevRawOffset, int prevDSTSavings) {
return getStartInYear(startYear, prevRawOffset, prevDSTSavings);
}
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getFinalStart(int prevRawOffset, int prevDSTSavings) {
if (endYear == MAX_YEAR) {
return null;
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getNextStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) {
int[] fields = Grego.timeToFields(base, null);
int year = fields[0];
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getPreviousStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive) {
int[] fields = Grego.timeToFields(base, null);
int year = fields[0];
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public boolean isEquivalentTo(TimeZoneRule other) {
if (!(other instanceof AnnualTimeZoneRule)) {
return false;
* Note: This method in <code>AnnualTimeZoneRule</code> always returns true.
* @stable ICU 3.8
*/
+ @Override
public boolean isTransitionRule() {
return true;
}
* Returns a <code>String</code> representation of this <code>AnnualTimeZoneRule</code> object.
* This method is used for debugging purpose only. The string representation can be changed
* in future version of ICU without any notice.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(super.toString());
* @return A new BytesTrie.Iterator.
* @stable ICU 4.8
*/
+ @Override
public Iterator iterator() {
return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
}
* @return true if there are more elements.
* @stable ICU 4.8
*/
+ @Override
public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
/**
* @throws NoSuchElementException - iteration has no more elements.
* @stable ICU 4.8
*/
+ @Override
public Entry next() {
int pos=pos_;
if(pos<0) {
* @throws UnsupportedOperationException (always)
* @stable ICU 4.8
*/
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
* @return A new CharsTrie.Iterator.
* @stable ICU 4.8
*/
+ @Override
public Iterator iterator() {
return new Iterator(chars_, pos_, remainingMatchLength_, 0);
}
* @return true if there are more elements.
* @stable ICU 4.8
*/
+ @Override
public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
/**
* @throws NoSuchElementException - iteration has no more elements.
* @stable ICU 4.8
*/
+ @Override
public Entry next() {
int pos=pos_;
if(pos<0) {
* @throws UnsupportedOperationException (always)
* @stable ICU 4.8
*/
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
* @stable ICU 2.6
*/
public static final int LONG_NAME = 1;
-
+
/**
- * Selector for getName() indicating the plural long name for a
- * currency, such as "US dollar" for USD in "1 US dollar",
+ * Selector for getName() indicating the plural long name for a
+ * currency, such as "US dollar" for USD in "1 US dollar",
* and "US dollars" for USD in "2 US dollars".
* @stable ICU 4.2
*/
public static final int PLURAL_LONG_NAME = 2;
-
+
private static final EquivalenceRelation<String> EQUIVALENT_CURRENCY_SYMBOLS =
new EquivalenceRelation<String>()
.add("\u00a5", "\uffe5")
* @stable ICU 54
*/
STANDARD,
-
+
/**
* a setting to specify currency usage which determines currency digit and rounding
* for cash usage, for example: "50 NT$"
*/
CASH
}
-
+
// begin registry stuff
// shim for service code
/**
* Returns an array of Strings which contain the currency
- * identifiers that are valid for the given locale on the
+ * identifiers that are valid for the given locale on the
* given date. If there are no such identifiers, returns null.
* Returned identifiers are in preference order.
* @param loc the locale for which to retrieve currency codes.
String region = ULocale.getRegionForSupplementalData(loc, false);
CurrencyFilter filter = CurrencyFilter.onDate(d).withRegion(region);
List<String> list = getTenderCurrencies(filter);
- // Note: Prior to 4.4 the spec didn't say that we return null if there are no results, but
+ // Note: Prior to 4.4 the spec didn't say that we return null if there are no results, but
// the test assumed it did. Kept the behavior and amended the spec.
if (list.isEmpty()) {
return null;
* Returns the set of available currencies. The returned set of currencies contains all of the
* available currencies, including obsolete ones. The result set can be modified without
* affecting the available currencies in the runtime.
- *
+ *
* @return The set of available currencies. The returned set could be empty if there is no
* currency data available.
- *
+ *
* @stable ICU 49
*/
public static Set<Currency> getAvailableCurrencies() {
}
return (Currency) MeasureUnit.internalGetInstance("currency", theISOCode.toUpperCase(Locale.ENGLISH));
}
-
-
+
+
private static boolean isAlpha3Code(String code) {
if (code.length() != 3) {
return false;
/**
* Registers a new currency for the provided locale. The returned object
* is a key that can be used to unregister this currency object.
- *
+ *
* <p>Because ICU may choose to cache Currency objects internally, this must
* be called at application startup, prior to any calls to
* Currency.getInstance to avoid undefined behavior.
- *
+ *
* @param currency the currency to register
* @param locale the ulocale under which to register the currency
* @return a registry key that can be used to unregister this currency
/**
* Given a key and a locale, returns an array of values for the key for which data
* exists. If commonlyUsed is true, these are the values that typically are used
- * with this locale, otherwise these are all values for which data exists.
+ * with this locale, otherwise these are all values for which data exists.
* This is a common service API.
* <p>
* The only supported key is "currency", other values return an empty array.
* If commonlyUsed is true, only the currencies known to be in use as of the current date
* are returned. When there are more than one, these are returned in preference order
* (typically, this occurs when a country is transitioning to a new currency, and the
- * newer currency is preferred), see
- * <a href="http://unicode.org/reports/tr35/#Supplemental_Currency_Data">Unicode TR#35 Sec. C1</a>.
+ * newer currency is preferred), see
+ * <a href="http://unicode.org/reports/tr35/#Supplemental_Currency_Data">Unicode TR#35 Sec. C1</a>.
* If commonlyUsed is false, all currencies ever used in any locale are returned, in no
* particular order.
- *
+ *
* @param key key whose values to look up. the only recognized key is "currency"
* @param locale the locale
* @param commonlyUsed if true, return only values that are currently used in the locale.
* array will be empty.
* @stable ICU 4.2
*/
- public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
+ public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
boolean commonlyUsed) {
-
+
// The only keyword we recognize is 'currency'
if (!"currency".equals(key)) {
return EMPTY_STRING_ARRAY;
}
-
+
if (!commonlyUsed) {
// Behavior change from 4.3.3, no longer sort the currencies
return getAllTenderCurrencies().toArray(new String[0]);
}
-
+
// Don't resolve region if the requested locale is 'und', it will resolve to US
// which we don't want.
if (UND.equals(locale)) {
String prefRegion = ULocale.getRegionForSupplementalData(locale, true);
CurrencyFilter filter = CurrencyFilter.now().withRegion(prefRegion);
-
+
// currencies are in region's preferred order when we're filtering on region, which
// matches our spec
List<String> result = getTenderCurrencies(filter);
-
+
// No fallback anymore (change from 4.3.3)
if (result.size() == 0) {
return EMPTY_STRING_ARRAY;
return result.toArray(new String[result.size()]);
}
-
+
private static final ULocale UND = new ULocale("und");
private static final String[] EMPTY_STRING_ARRAY = new String[0];
/**
* Returns the display name for the given currency in the
- * given locale.
- * This is a convenient method for
- * getName(ULocale, int, boolean[]);
+ * given locale.
+ * This is a convenient method for
+ * getName(ULocale, int, boolean[]);
* @stable ICU 3.2
*/
public String getName(Locale locale,
* currency object in the en_US locale is "$".
* @param locale locale in which to display currency
* @param nameStyle selector for which kind of name to return.
- * The nameStyle should be either SYMBOL_NAME or
+ * The nameStyle should be either SYMBOL_NAME or
* LONG_NAME. Otherwise, throw IllegalArgumentException.
* @param isChoiceFormat fill-in; isChoiceFormat[0] is set to true
* if the returned value is a ChoiceFormat pattern; otherwise it
}
/**
- * Returns the display name for the given currency in the given locale.
+ * Returns the display name for the given currency in the given locale.
* This is a convenience overload of getName(ULocale, int, String, boolean[]);
* @stable ICU 4.2
*/
* Returns the display name for the given currency in the
* given locale. For example, the SYMBOL_NAME for the USD
* currency object in the en_US locale is "$".
- * The PLURAL_LONG_NAME for the USD currency object when the currency
+ * The PLURAL_LONG_NAME for the USD currency object when the currency
* amount is plural is "US dollars", such as in "3.00 US dollars";
* while the PLURAL_LONG_NAME for the USD currency object when the currency
* amount is singular is "US dollar", such as in "1.00 US dollar".
if (isChoiceFormat != null) {
isChoiceFormat[0] = false;
}
-
+
CurrencyDisplayNames names = CurrencyDisplayNames.getInstance(locale);
return names.getPluralName(subType, pluralCount);
}
* Note: This method is a convenience equivalent for
* {@link java.util.Currency#getDisplayName()} and is equivalent to
* <code>getName(Locale.getDefault(), LONG_NAME, null)</code>.
- *
+ *
* @return The display name of this currency
* @see #getDisplayName(Locale)
* @see #getName(Locale, int, boolean[])
* Note: This method is a convenience equivalent for
* {@link java.util.Currency#getDisplayName(java.util.Locale)} and is equivalent
* to <code>getName(locale, LONG_NAME, null)</code>.
- *
+ *
* @param locale locale in which to display currency
* @return The display name of this currency for the specified locale
* @see #getDisplayName(Locale)
public static String parse(ULocale locale, String text, int type, ParsePosition pos) {
List<TextTrieMap<CurrencyStringInfo>> currencyTrieVec = CURRENCY_NAME_CACHE.get(locale);
if (currencyTrieVec == null) {
- TextTrieMap<CurrencyStringInfo> currencyNameTrie =
+ TextTrieMap<CurrencyStringInfo> currencyNameTrie =
new TextTrieMap<CurrencyStringInfo>(true);
- TextTrieMap<CurrencyStringInfo> currencySymbolTrie =
+ TextTrieMap<CurrencyStringInfo> currencySymbolTrie =
new TextTrieMap<CurrencyStringInfo>(false);
currencyTrieVec = new ArrayList<TextTrieMap<CurrencyStringInfo>>();
currencyTrieVec.add(currencySymbolTrie);
setupCurrencyTrieVec(locale, currencyTrieVec);
CURRENCY_NAME_CACHE.put(locale, currencyTrieVec);
}
-
+
int maxLength = 0;
String isoResult = null;
return isoResult;
}
- private static void setupCurrencyTrieVec(ULocale locale,
+ private static void setupCurrencyTrieVec(ULocale locale,
List<TextTrieMap<CurrencyStringInfo>> trieVec) {
TextTrieMap<CurrencyStringInfo> symTrie = trieVec.get(0);
}
}
- private static class CurrencyNameResultHandler
+ private static class CurrencyNameResultHandler
implements TextTrieMap.ResultHandler<CurrencyStringInfo> {
// The length of longest matching key
private int bestMatchLength;
// The currency ISO code of longest matching key
private String bestCurrencyISOCode;
-
+
// As the trie is traversed, handlePrefixMatch is called at each node. matchLength is the
// length length of the key at the current node; values is the list of all the values mapped to
// that key. matchLength increases with each call as trie is traversed.
+ @Override
public boolean handlePrefixMatch(int matchLength, Iterator<CurrencyStringInfo> values) {
if (values.hasNext()) {
// Since the best match criteria is only based on length of key in trie and since all the
public String getBestCurrencyISOCode() {
return bestCurrencyISOCode;
}
-
+
public int getBestMatchLength() {
return bestMatchLength;
}
* Returns the ISO 4217 code for this currency.
* @stable ICU 2.2
*/
+ @Override
public String toString() {
return subType;
}
/**
* Constructs a currency object for the given ISO 4217 3-letter
* code. This constructor assumes that the code is valid.
- *
+ *
* @param theISOCode The iso code used to construct the currency.
* @stable ICU 3.4
*/
}
// POW10[i] = 10^i
- private static final int[] POW10 = {
- 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
+ private static final int[] POW10 = {
+ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000
};
}
return all;
}
-
+
private static synchronized Set<String> getAllCurrenciesAsSet() {
Set<String> all = (ALL_CODES_AS_SET == null) ? null : ALL_CODES_AS_SET.get();
if (all == null) {
* Note: For checking availability of a currency on a specific date, specify the date on both <code>from</code> and
* <code>to</code>. When both <code>from</code> and <code>to</code> are null, this method checks if the specified
* currency is available all time.
- *
+ *
* @param code
* The ISO 4217 3-letter code.
* @param from
* the currency any date after <code>from</code>
* @return true if the given ISO 4217 3-letter code is supported on the specified date range.
* @throws IllegalArgumentException when <code>to</code> is before <code>from</code>.
- *
+ *
* @stable ICU 4.6
*/
public static boolean isAvailable(String code, Date from, Date to) {
CurrencyMetaInfo info = CurrencyMetaInfo.getInstance();
return info.currencies(filter.withTender());
}
-
+
private static final class EquivalenceRelation<T> {
-
+
private Map<T, Set<T>> data = new HashMap<T, Set<T>>();
-
+
@SuppressWarnings("unchecked") // See ticket #11395, this is safe.
public EquivalenceRelation<T> add(T... items) {
Set<T> group = new HashSet<T>();
}
return this;
}
-
+
public Set<T> get(T item) {
Set<T> result = data.get(item);
if (result == null) {
return Collections.unmodifiableSet(result);
}
}
-
+
private Object writeReplace() throws ObjectStreamException {
return new MeasureUnitProxy(type, subType);
}
* <code>DateTimeRule</code> is a class representing a time in a year by
* a rule specified by month, day of month, day of week and
* time in the day.
- *
+ *
* @stable ICU 3.8
*/
public class DateTimeRule implements Serializable {
/**
* Date rule type defined by exact day of month.
* For example, March 14.
- *
+ *
* @stable ICU 3.8
*/
public static final int DOM = 0;
/**
* Date rule type defined by day of week in month.
* For example, 2nd Sunday in March.
- *
+ *
* @stable ICU 3.8
*/
public static final int DOW = 1;
* Date rule type defined by first day of week on or
* after exact day of month.
* For example, 1st Monday on or after March 15.
- *
+ *
* @stable ICU 3.8
*/
public static final int DOW_GEQ_DOM = 2;
* Date rule type defined by last day of week on or
* before exact day of month.
* For example, last Saturday on or before March 15.
- *
+ *
* @stable ICU 3.8
*/
public static final int DOW_LEQ_DOM = 3;
-
+
/**
* Time rule type for local wall time.
- *
+ *
* @stable ICU 3.8
*/
public static final int WALL_TIME = 0;
/**
* Time rule type for local standard time.
- *
+ *
* @stable ICU 3.8
*/
public static final int STANDARD_TIME = 1;
/**
* Time rule type for coordinated universal time.
- *
+ *
* @stable ICU 3.8
*/
public static final int UTC_TIME = 2;
* Constructs a <code>DateTimeRule</code> by the day of month and
* the time rule. The date rule type for an instance created by
* this constructor is <code>DOM</code>.
- *
+ *
* @param month The rule month, for example, <code>Calendar.JANUARY</code>
* @param dayOfMonth The day of month, 1-based.
* @param millisInDay The milliseconds in the rule date.
* @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
* or <code>UTC_TIME</code>.
- *
+ *
* @stable ICU 3.8
*/
public DateTimeRule(int month, int dayOfMonth,
this.millisInDay = millisInDay;
this.timeRuleType = timeType;
-
+
// not used by this rule type
this.dayOfWeek = 0;
this.weekInMonth = 0;
* Constructs a <code>DateTimeRule</code> by the day of week and its oridinal
* number and the time rule. The date rule type for an instance created
* by this constructor is <code>DOW</code>.
- *
+ *
* @param month The rule month, for example, <code>Calendar.JANUARY</code>.
* @param weekInMonth The ordinal number of the day of week. Negative number
* may be used for specifying a rule date counted from the
* @param millisInDay The milliseconds in the rule date.
* @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
* or <code>UTC_TIME</code>.
- *
+ *
* @stable ICU 3.8
*/
public DateTimeRule(int month, int weekInMonth, int dayOfWeek,
* on or after/before the day of month and the time rule. The date rule
* type for an instance created by this constructor is either
* <code>DOM_GEQ_DOM</code> or <code>DOM_LEQ_DOM</code>.
- *
+ *
* @param month The rule month, for example, <code>Calendar.JANUARY</code>
* @param dayOfMonth The day of month, 1-based.
* @param dayOfWeek The day of week, for example, <code>Calendar.SUNDAY</code>.
* @param millisInDay The milliseconds in the rule date.
* @param timeType The time type, <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
* or <code>UTC_TIME</code>.
- *
+ *
* @stable ICU 3.8
*/
public DateTimeRule(int month, int dayOfMonth, int dayOfWeek, boolean after,
/**
* Gets the date rule type, such as <code>DOM</code>
- *
+ *
* @return The date rule type.
- *
+ *
* @stable ICU 3.8
*/
public int getDateRuleType() {
/**
* Gets the rule month.
- *
+ *
* @return The rule month.
- *
+ *
* @stable ICU 3.8
*/
public int getRuleMonth() {
/**
* Gets the rule day of month. When the date rule type
* is <code>DOW</code>, the value is always 0.
- *
+ *
* @return The rule day of month
- *
+ *
* @stable ICU 3.8
*/
public int getRuleDayOfMonth() {
/**
* Gets the rule day of week. When the date rule type
* is <code>DOM</code>, the value is always 0.
- *
+ *
* @return The rule day of week.
- *
+ *
* @stable ICU 3.8
*/
public int getRuleDayOfWeek() {
* Gets the rule day of week ordinal number in the month.
* When the date rule type is not <code>DOW</code>, the value is
* always 0.
- *
+ *
* @return The rule day of week ordinal number in the month.
- *
+ *
* @stable ICU 3.8
*/
public int getRuleWeekInMonth() {
/**
* Gets the time rule type
- *
+ *
* @return The time rule type, either <code>WALL_TIME</code> or <code>STANDARD_TIME</code>
* or <code>UTC_TIME</code>.
- *
+ *
* @stable ICU 3.8
*/
public int getTimeRuleType() {
/**
* Gets the rule time in the rule day.
- *
+ *
* @return The time in the rule day in milliseconds.
- *
+ *
* @stable ICU 3.8
*/
public int getRuleMillisInDay() {
return millisInDay;
}
-
+
private static final String[] DOWSTR = {"", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
private static final String[] MONSTR = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
* Returns a <code>String</code> representation of this <code>DateTimeRule</code> object.
* This method is used for debugging purpose only. The string representation can be changed
* in future version of ICU without any notice.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public String toString() {
String sDate = null;
String sTimeRuleType = null;
/**
* <b>Note:</b> The Holiday framework is a technology preview.
* Despite its age, is still draft API, and clients should treat it as such.
- *
+ *
* A Holiday subclass which represents holidays that occur
* a fixed number of days before or after Easter. Supports both the
* Western and Orthodox methods for calculating Easter.
}
/**
- * Return the first occurrance of this rule on or after the given date
+ * Return the first occurrence of this rule on or after the given date
*/
+ @Override
public Date firstAfter(Date start)
{
return doFirstBetween(start, null);
}
/**
- * Return the first occurrance of this rule on or after
+ * Return the first occurrence of this rule on or after
* the given start date and before the given end date.
*/
+ @Override
public Date firstBetween(Date start, Date end)
{
return doFirstBetween(start, end);
/**
* Return true if the given Date is on the same day as Easter
*/
+ @Override
public boolean isOn(Date date)
{
synchronized(calendar) {
/**
* Return true if Easter occurs between the two dates given
*/
+ @Override
public boolean isBetween(Date start, Date end)
{
return firstBetween(start, end) != null; // TODO: optimize?
/**
* <b>Note:</b> The Holiday framework is a technology preview.
* Despite its age, is still draft API, and clients should treat it as such.
- *
+ *
* An abstract class representing a holiday.
* @draft ICU 2.8 (retainAll)
* @provisional This API might change or be removed in a future release.
* @draft ICU 2.8
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public Date firstAfter(Date start) {
return rule.firstAfter(start);
}
* @draft ICU 2.8
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public Date firstBetween(Date start, Date end) {
return rule.firstBetween(start, end);
}
* @draft ICU 2.8
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public boolean isOn(Date date) {
//System.out.println(name + ".isOn(" + date.toString() + "):");
return rule.isOn(date);
* @draft ICU 2.8
* @provisional This API might change or be removed in a future release.
*/
+ @Override
public boolean isBetween(Date start, Date end) {
return rule.isBetween(start, end);
}
* <code>InitialTimeZoneRule</code> represents a time zone rule
* representing a time zone effective from the beginning and
* has no actual start times.
- *
+ *
* @stable ICU 3.8
*/
public class InitialTimeZoneRule extends TimeZoneRule {
/**
* Constructs a <code>InitialTimeZoneRule</code> with the name, the GMT offset of its
* standard time and the amount of daylight saving offset adjustment.
- *
+ *
* @param name The time zone name.
* @param rawOffset The UTC offset of its standard time in milliseconds.
* @param dstSavings The amount of daylight saving offset adjustment in milliseconds.
* If this ia a rule for standard time, the value of this argument is 0.
- *
+ *
* @stable ICU 3.8
*/
public InitialTimeZoneRule(String name, int rawOffset, int dstSavings) {
/**
* {@inheritDoc}
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public boolean isEquivalentTo(TimeZoneRule other) {
if (other instanceof InitialTimeZoneRule) {
return super.isEquivalentTo(other);
}
return false;
}
-
+
/**
* {@inheritDoc}<br><br>
* Note: This method in <code>InitialTimeZoneRule</code> always returns null.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public Date getFinalStart(int prevRawOffset, int prevDSTSavings) {
// No start time available
return null;
/**
* {@inheritDoc}<br><br>
* Note: This method in <code>InitialTimeZoneRule</code> always returns null.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public Date getFirstStart(int prevRawOffset, int prevDSTSavings) {
// No start time available
return null;
/**
* {@inheritDoc}<br><br>
* Note: This method in <code>InitialTimeZoneRule</code> always returns null.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public Date getNextStart(long base, int prevRawOffset, int prevDSTSavings,
boolean inclusive) {
// No start time available
/**
* {@inheritDoc}<br><br>
* Note: This method in <code>InitialTimeZoneRule</code> always returns null.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public Date getPreviousStart(long base, int prevRawOffset,
int prevDSTSavings, boolean inclusive) {
// No start time available
* Note: This method in <code>InitialTimeZoneRule</code> always returns false.
* @stable ICU 3.8
*/
+ @Override
public boolean isTransitionRule() {
return false;
}
* otherwise known as Metric system.
* @stable ICU 2.8
*/
- public static final MeasurementSystem SI = new MeasurementSystem(0);
+ public static final MeasurementSystem SI = new MeasurementSystem();
/**
* Measurement system followed in the United States of America.
* @stable ICU 2.8
*/
- public static final MeasurementSystem US = new MeasurementSystem(1);
+ public static final MeasurementSystem US = new MeasurementSystem();
/**
* Mix of metric and imperial units used in Great Britain.
* @stable ICU 55
*/
- public static final MeasurementSystem UK = new MeasurementSystem(2);
+ public static final MeasurementSystem UK = new MeasurementSystem();
- private int systemID;
- private MeasurementSystem(int id){
- systemID = id;
- }
-
- private boolean equals(int id){
- return systemID == id;
- }
+ private MeasurementSystem() {}
}
/**
public static final MeasurementSystem getMeasurementSystem(ULocale locale){
UResourceBundle sysBundle = measurementTypeBundleForLocale(locale, MEASUREMENT_SYSTEM);
- int system = sysBundle.getInt();
- if(MeasurementSystem.US.equals(system)){
- return MeasurementSystem.US;
- }
- if(MeasurementSystem.UK.equals(system)){
- return MeasurementSystem.UK;
- }
- if(MeasurementSystem.SI.equals(system)){
- return MeasurementSystem.SI;
+ switch (sysBundle.getInt()) {
+ case 0: return MeasurementSystem.SI;
+ case 1: return MeasurementSystem.US;
+ case 2: return MeasurementSystem.UK;
+ default:
+ // return null if the object is null or is not an instance
+ // of integer indicating an error
+ return null;
}
- // return null if the object is null or is not an instance
- // of integer indicating an error
- return null;
}
/**
* @stable ICU 3.0
*/
public class Measure {
-
+
private final Number number;
private final MeasureUnit unit;
this.number = number;
this.unit = unit;
}
-
+
/**
* Returns true if the given object is equal to this object.
* @return true if this object is equal to the given object
* @stable ICU 3.0
*/
+ @Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
Measure m = (Measure) obj;
return unit.equals(m.unit) && numbersEqual(number, m.number);
}
-
+
/*
* See if two numbers are identical or have the same double value.
* @param a A number
* @return a 32-bit hash
* @stable ICU 3.0
*/
+ @Override
public int hashCode() {
return 31 * Double.valueOf(number.doubleValue()).hashCode() + unit.hashCode();
}
* code together with the numeric amount
* @stable ICU 3.0
*/
+ @Override
public String toString() {
return number.toString() + ' ' + unit.toString();
}
* {@inheritDoc}
* @stable ICU 4.8
*/
+ @Override
public String toString() {
return value == null ? "null" : value.toString();
}
* @stable ICU 4.8
*/
public Output() {
-
+
}
/**
* @deprecated This API is ICU internal only.
*/
@Deprecated
+ @Override
public String toString() {
return Integer.toString(value);
}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public void getOffsetFromLocal(long date,
int nonExistingTimeOpt, int duplicatedTimeOpt, int[] offsets) {
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public boolean observesDaylightTime() {
return useDaylight;
}
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public boolean isFrozen() {
return isFrozen;
}
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZone freeze() {
isFrozen = true;
return this;
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZone cloneAsThawed() {
SimpleTimeZone tz = (SimpleTimeZone)super.cloneAsThawed();
tz.isFrozen = false;
/**
* <code>TimeArrayTimeZoneRule</code> represents a time zone rule whose start times are
* defined by an array of milliseconds since the standard base time.
- *
+ *
* @stable ICU 3.8
*/
public class TimeArrayTimeZoneRule extends TimeZoneRule {
* Constructs a <code>TimeArrayTimeZoneRule</code> with the name, the GMT offset of its
* standard time, the amount of daylight saving offset adjustment and
* the array of times when this rule takes effect.
- *
+ *
* @param name The time zone name.
* @param rawOffset The UTC offset of its standard time in milliseconds.
* @param dstSavings The amount of daylight saving offset adjustment in
* @param timeType The time type of the start times, which is one of
* <code>DataTimeRule.WALL_TIME</code>, <code>STANDARD_TIME</code>
* and <code>UTC_TIME</code>.
- *
+ *
* @stable ICU 3.8
*/
public TimeArrayTimeZoneRule(String name, int rawOffset, int dstSavings, long[] startTimes, int timeType) {
/**
* Gets the array of start times used by this rule.
- *
+ *
* @return An array of the start times in milliseconds since the base time
* (January 1, 1970, 00:00:00 GMT).
* @stable ICU 3.8
* Gets the time type of the start times used by this rule. The return value
* is either <code>DateTimeRule.WALL_TIME</code> or <code>DateTimeRule.STANDARD_TIME</code>
* or <code>DateTimeRule.UTC_TIME</code>.
- *
+ *
* @return The time type used of the start times used by this rule.
* @stable ICU 3.8
*/
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getFirstStart(int prevRawOffset, int prevDSTSavings) {
return new Date(getUTC(startTimes[0], prevRawOffset, prevDSTSavings));
}
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getFinalStart(int prevRawOffset, int prevDSTSavings) {
return new Date(getUTC(startTimes[startTimes.length - 1], prevRawOffset, prevDSTSavings));
}
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getNextStart(long base, int prevOffset, int prevDSTSavings, boolean inclusive) {
int i = startTimes.length - 1;
for (; i >= 0; i--) {
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public Date getPreviousStart(long base, int prevOffset, int prevDSTSavings, boolean inclusive) {
int i = startTimes.length - 1;
for (; i >= 0; i--) {
* {@inheritDoc}
* @stable ICU 3.8
*/
+ @Override
public boolean isEquivalentTo(TimeZoneRule other) {
if (!(other instanceof TimeArrayTimeZoneRule)) {
return false;
* Note: This method in <code>TimeArrayTimeZoneRule</code> always returns true.
* @stable ICU 3.8
*/
+ @Override
public boolean isTransitionRule() {
return true;
}
* Returns a <code>String</code> representation of this <code>TimeArrayTimeZoneRule</code> object.
* This method is used for debugging purpose only. The string representation can be changed
* in future version of ICU without any notice.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(super.toString());
* offset from GMT(=UTC) and does not observe daylight saving
* time. For example, you might specify GMT+14:00 as a custom
* time zone ID to create a TimeZone representing 14 hours ahead
- * of GMT (with no daylight saving time). In addition,
+ * of GMT (with no daylight saving time). In addition,
* <code>getCanonicalID</code> can also be used to
* normalize a custom time zone ID.
*
/**
* {@icu} The time zone ID reserved for unknown time zone.
* @see #getTimeZone(String)
- *
+ *
* @stable ICU 4.8
*/
public static final String UNKNOWN_ZONE_ID = "Etc/Unknown";
*
* @see #UNKNOWN_ZONE_ID
* @see #getTimeZone(String)
- *
+ *
* @stable ICU 49
*/
public static final TimeZone UNKNOWN_ZONE = new ConstantZone(0, UNKNOWN_ZONE_ID).freeze();
if (style < SHORT || style > GENERIC_LOCATION) {
throw new IllegalArgumentException("Illegal style: " + style);
}
-
+
return _getDisplayName(style, daylight, locale);
}
* the implementation does not take past daylight saving time into account, so
* that this method may return <code>false</code> even when {@link #useDaylightTime()} returns
* <code>true</code>.
- *
+ *
* @return <code>true</code> if this time zone is in daylight saving time or will observe
* daylight saving time at any future time.
* @see #useDaylightTime
/**
* Gets the <code>TimeZone</code> for the given ID and the timezone type.
* @param id time zone ID
- * @param type time zone implementation type, TIMEZONE_JDK or TIMEZONE_ICU
+ * @param type time zone implementation type, TIMEZONE_JDK or TIMEZONE_ICU
* @param frozen specify if the returned object can be frozen
* @return the specified <code>TimeZone</code> or UNKNOWN_ZONE if the given ID
* cannot be understood.
result = JavaTimeZone.createTimeZone(id);
if (result != null) {
return frozen ? result.freeze() : result;
- }
+ }
result = getFrozenICUTimeZone(id, false);
} else {
result = getFrozenICUTimeZone(id, true);
}
return frozen ? result : result.cloneAsThawed();
}
-
+
/**
* Returns a frozen ICU type TimeZone object given a time zone ID.
* @param id the time zone ID
return TZ_IMPL;
}
- /**
- * {@icu} Returns a set of time zone ID strings with the given filter conditions.
+ /**
+ * {@icu} Returns a set of time zone ID strings with the given filter conditions.
* <p><b>Note:</b>A <code>Set</code> returned by this method is
* immutable.
* @param zoneType The system time zone type.
- * @param region The ISO 3166 two-letter country code or UN M.49 three-digit area code.
- * When null, no filtering done by region.
- * @param rawOffset An offset from GMT in milliseconds, ignoring the effect of daylight savings
- * time, if any. When null, no filtering done by zone offset.
+ * @param region The ISO 3166 two-letter country code or UN M.49 three-digit area code.
+ * When null, no filtering done by region.
+ * @param rawOffset An offset from GMT in milliseconds, ignoring the effect of daylight savings
+ * time, if any. When null, no filtering done by zone offset.
* @return an immutable set of system time zone IDs.
* @see SystemTimeZoneType
- *
+ *
* @stable ICU 4.8
- */
+ */
public static Set<String> getAvailableIDs(SystemTimeZoneType zoneType,
String region, Integer rawOffset) {
return ZoneMeta.getAvailableIDs(zoneType, region, rawOffset);
* @return an array of IDs for system TimeZones with the given
* raw offset. If there are none, return a zero-length array.
* @see #getAvailableIDs(SystemTimeZoneType, String, Integer)
- *
+ *
* @stable ICU 2.0
*/
public static String[] getAvailableIDs(int rawOffset) {
* @return an array of IDs for system TimeZones in the given
* country. If there are none, return a zero-length array.
* @see #getAvailableIDs(SystemTimeZoneType, String, Integer)
- *
+ *
* @stable ICU 2.0
*/
public static String[] getAvailableIDs(String country) {
* object.
* @return an array of all system TimeZone IDs
* @see #getAvailableIDs(SystemTimeZoneType, String, Integer)
- *
+ *
* @stable ICU 2.0
*/
public static String[] getAvailableIDs() {
* Overrides clone.
* @stable ICU 2.0
*/
+ @Override
public Object clone() {
if (isFrozen()) {
return this;
* Overrides equals.
* @stable ICU 3.6
*/
+ @Override
public boolean equals(Object obj){
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
* Overrides hashCode.
* @stable ICU 3.6
*/
+ @Override
public int hashCode(){
return ID.hashCode();
}
return canonicalID;
}
- /**
- * {@icu} Returns the region code associated with the given
- * system time zone ID. The region code is either ISO 3166
- * 2-letter country code or UN M.49 3-digit area code.
- * When the time zone is not associated with a specific location,
- * for example - "Etc/UTC", "EST5EDT", then this method returns
- * "001" (UN M.49 area code for World).
- * @param id the system time zone ID.
- * @return the region code associated with the given
- * system time zone ID.
- * @throws IllegalArgumentException if <code>id</code> is not a known system ID.
- * @see #getAvailableIDs(String)
- *
+ /**
+ * {@icu} Returns the region code associated with the given
+ * system time zone ID. The region code is either ISO 3166
+ * 2-letter country code or UN M.49 3-digit area code.
+ * When the time zone is not associated with a specific location,
+ * for example - "Etc/UTC", "EST5EDT", then this method returns
+ * "001" (UN M.49 area code for World).
+ * @param id the system time zone ID.
+ * @return the region code associated with the given
+ * system time zone ID.
+ * @throws IllegalArgumentException if <code>id</code> is not a known system ID.
+ * @see #getAvailableIDs(String)
+ *
* @stable ICU 4.8
- */
+ */
public static String getRegion(String id) {
String region = null;
// "Etc/Unknown" is not a system time zone ID,
/**
* {@icu} Converts a system time zone ID to an equivalent Windows time zone ID. For example,
* Windows time zone ID "Pacific Standard Time" is returned for input "America/Los_Angeles".
- *
+ *
* <p>There are system time zones that cannot be mapped to Windows zones. When the input
* system time zone ID is unknown or unmappable to a Windows time zone, then this
* method returns <code>null</code>.
- *
+ *
* <p>This implementation utilizes <a href="http://unicode.org/cldr/charts/supplemental/zone_tzid.html">
* Zone-Tzid mapping data</a>. The mapping data is updated time to time. To get the latest changes,
* please read the ICU user guide section <a href="http://userguide.icu-project.org/datetime/timezone#TOC-Updating-the-Time-Zone-Data">
* Updating the Time Zone Data</a>.
- *
+ *
* @param id A system time zone ID
* @return A Windows time zone ID mapped from the input system time zone ID,
* or <code>null</code> when the input ID is unknown or unmappable.
* @see #getIDForWindowsID(String, String)
- *
+ *
* @stable ICU 52
*/
public static String getWindowsID(String id) {
id = getCanonicalID(id, isSystemID);
if (!isSystemID[0]) {
// mapping data is only applicable to tz database IDs
- return null;
+ return null;
}
UResourceBundle top = UResourceBundle.getBundleInstance(
/**
* {@icu} Converts a Windows time zone ID to an equivalent system time zone ID
- * for a region. For example, system time zone ID "America/Los_Angeles" is returned
+ * for a region. For example, system time zone ID "America/Los_Angeles" is returned
* for input Windows ID "Pacific Standard Time" and region "US" (or <code>null</code>),
* "America/Vancouver" is returned for the same Windows ID "Pacific Standard Time" and
* region "CA".
- *
+ *
* <p>Not all Windows time zones can be mapped to system time zones. When the input
* Windows time zone ID is unknown or unmappable to a system time zone, then this
* method returns <code>null</code>.
* @return A system time zone ID mapped from the input Windows time zone ID,
* or <code>null</code> when the input ID is unknown or unmappable.
* @see #getWindowsID(String)
- *
+ *
* @stable ICU 52
*/
public static String getIDForWindowsID(String winid, String region) {
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public boolean isFrozen() {
return false;
}
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZone freeze() {
throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
}
* {@inheritDoc}
* @stable ICU 49
*/
+ @Override
public TimeZone cloneAsThawed() {
try {
TimeZone other = (TimeZone) super.clone();
* <code>TimeZoneRule</code> is an abstract class representing a rule for time zone.
* <code>TimeZoneRule</code> has a set of time zone attributes, such as zone name,
* raw offset (UTC offset for standard time) and daylight saving time offset.
- *
+ *
* @see com.ibm.icu.util.TimeZoneTransition
* @see com.ibm.icu.util.RuleBasedTimeZone
- *
+ *
* @stable ICU 3.8
*/
public abstract class TimeZoneRule implements Serializable {
/**
* Constructs a <code>TimeZoneRule</code> with the name, the GMT offset of its
* standard time and the amount of daylight saving offset adjustment.
- *
+ *
* @param name The time zone name.
* @param rawOffset The UTC offset of its standard time in milliseconds.
* @param dstSavings The amount of daylight saving offset adjustment in milliseconds.
* If this is a rule for standard time, the value of this argument is 0.
- *
+ *
* @stable ICU 3.8
*/
public TimeZoneRule(String name, int rawOffset, int dstSavings) {
/**
* Gets the name of this time zone.
- *
+ *
* @return The name of this time zone.
- *
+ *
* @stable ICU 3.8
*/
public String getName() {
/**
* Gets the standard time offset.
- *
+ *
* @return The standard time offset from UTC in milliseconds.
- *
+ *
* @stable ICU 3.8
*/
public int getRawOffset() {
/**
* Gets the amount of daylight saving delta time from the standard time.
- *
+ *
* @return The amount of daylight saving offset used by this rule
* in milliseconds.
- *
+ *
* @stable ICU 3.8
*/
public int getDSTSavings() {
*
* @param other The <code>TimeZoneRule</code> object to be compared with.
* @return true if the other <code>TimeZoneRule</code> is the same as this one.
- *
+ *
* @stable ICU 3.8
*/
public boolean isEquivalentTo(TimeZoneRule other) {
}
return false;
}
-
+
/**
* Gets the very first time when this rule takes effect.
- *
+ *
* @param prevRawOffset The standard time offset from UTC before this rule
* takes effect in milliseconds.
* @param prevDSTSavings The amount of daylight saving offset from the
- * standard time.
- *
+ * standard time.
+ *
* @return The very first time when this rule takes effect.
- *
+ *
* @stable ICU 3.8
*/
public abstract Date getFirstStart(int prevRawOffset, int prevDSTSavings);
/**
* Gets the final time when this rule takes effect.
- *
+ *
* @param prevRawOffset The standard time offset from UTC before this rule
* takes effect in milliseconds.
* @param prevDSTSavings The amount of daylight saving offset from the
- * standard time.
- *
+ * standard time.
+ *
* @return The very last time when this rule takes effect,
* or null if this rule is applied for future dates infinitely.
- *
+ *
* @stable ICU 3.8
*/
public abstract Date getFinalStart(int prevRawOffset, int prevDSTSavings);
/**
* Gets the first time when this rule takes effect after the specified time.
- *
+ *
* @param base The first time after this time is returned.
* @param prevRawOffset The standard time offset from UTC before this rule
* takes effect in milliseconds.
* @param prevDSTSavings The amount of daylight saving offset from the
- * standard time.
+ * standard time.
* @param inclusive Whether the base time is inclusive or not.
- *
+ *
* @return The first time when this rule takes effect after the specified time,
* or null when this rule never takes effect after the specified time.
- *
+ *
* @stable ICU 3.8
*/
public abstract Date getNextStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive);
/**
* Gets the most recent time when this rule takes effect before the specified time.
- *
+ *
* @param base The most recent time when this rule takes effect before
* this time is returned.
* @param prevRawOffset The standard time offset from UTC before this rule
* takes effect in milliseconds.
* @param prevDSTSavings The amount of daylight saving offset from the
- * standard time.
+ * standard time.
* @param inclusive Whether the base time is inclusive or not.
- *
+ *
* @return The most recent time when this rule takes effect before the specified time,
* or null when this rule never takes effect before the specified time.
- *
+ *
* @stable ICU 3.8
*/
public abstract Date getPreviousStart(long base, int prevRawOffset, int prevDSTSavings, boolean inclusive);
/**
* Returns if this <code>TimeZoneRule</code> has one or more start times.
- *
+ *
* @return true if this <code>TimeZoneRule</code> has one or more start times.
- *
+ *
* @stable ICU 3.8
*/
public abstract boolean isTransitionRule();
* in future version of ICU without any notice.
* @stable ICU 3.8
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("name=" + name);
* <code>TimeZoneTransition</code> is a class representing a time zone transition.
* An instance has a time of transition and rules for both before and
* after the transition.
- *
+ *
* @stable ICU 3.8
*/
public class TimeZoneTransition {
/**
* Constructs a <code>TimeZoneTransition</code> with the time and the rules before/after
* the transition.
- *
+ *
* @param time The time of transition in milliseconds since the base time.
* @param from The time zone rule used before the transition.
* @param to The time zone rule used after the transition.
- *
+ *
* @stable ICU 3.8
*/
public TimeZoneTransition(long time, TimeZoneRule from, TimeZoneRule to) {
/**
* Returns the time of transition in milliseconds since the base time.
- *
+ *
* @return The time of the transition in milliseconds since the base time.
- *
+ *
* @stable ICU 3.8
*/
public long getTime() {
/**
* Returns the rule used after the transition.
- *
+ *
* @return The time zone rule used after the transition.
- *
+ *
* @stable ICU 3.8
*/
public TimeZoneRule getTo() {
/**
* Returns the rule used before the transition.
- *
+ *
* @return The time zone rule used after the transition.
- *
+ *
* @stable ICU 3.8
*/
public TimeZoneRule getFrom() {
* Returns a <code>String</code> representation of this <code>TimeZoneTransition</code> object.
* This method is used for debugging purpose only. The string representation can be changed
* in future version of ICU without any notice.
- *
+ *
* @stable ICU 3.8
*/
+ @Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append("time=" + time);
* change. To open ICU style organization use:
*
* <pre>
- * UResourceBundle bundle =
- * UResourceBundle.getBundleInstance("com/mycompany/resources",
+ * UResourceBundle bundle =
+ * UResourceBundle.getBundleInstance("com/mycompany/resources",
* "en_US", myClassLoader);
* </pre>
* To open Java/JDK style organization use:
* <pre>
- * UResourceBundle bundle =
- * UResourceBundle.getBundleInstance("com.mycompany.resources.LocaleElements",
+ * UResourceBundle bundle =
+ * UResourceBundle.getBundleInstance("com.mycompany.resources.LocaleElements",
* "en_US", myClassLoader);
* </pre>
*
* @stable ICU 3.0
*/
public static UResourceBundle getBundleInstance(String baseName, String localeName){
- return getBundleInstance(baseName, localeName, ICUResourceBundle.ICU_DATA_CLASS_LOADER,
+ return getBundleInstance(baseName, localeName, ICUResourceBundle.ICU_DATA_CLASS_LOADER,
false);
}
* @return a resource bundle for the given base name and locale
* @stable ICU 3.0
*/
- public static UResourceBundle getBundleInstance(String baseName, String localeName,
+ public static UResourceBundle getBundleInstance(String baseName, String localeName,
ClassLoader root){
return getBundleInstance(baseName, localeName, root, false);
}
* @stable ICU 3.0
*
*/
- protected static UResourceBundle getBundleInstance(String baseName, String localeName,
+ protected static UResourceBundle getBundleInstance(String baseName, String localeName,
ClassLoader root, boolean disableFallback) {
return instantiateBundle(baseName, localeName, root, disableFallback);
}
baseName = ICUData.ICU_BASE_NAME;
}
ULocale uloc = ULocale.getDefault();
- return getBundleInstance(baseName, uloc.getBaseName(), ICUResourceBundle.ICU_DATA_CLASS_LOADER,
+ return getBundleInstance(baseName, uloc.getBaseName(), ICUResourceBundle.ICU_DATA_CLASS_LOADER,
false);
}
* @return a resource bundle for the given base name and locale
* @stable ICU 3.8
*/
- public static UResourceBundle getBundleInstance(String baseName, Locale locale,
+ public static UResourceBundle getBundleInstance(String baseName, Locale locale,
ClassLoader loader) {
if (baseName == null) {
baseName = ICUData.ICU_BASE_NAME;
* @return a resource bundle for the given base name and locale
* @stable ICU 3.8
*/
- public static UResourceBundle getBundleInstance(String baseName, ULocale locale,
+ public static UResourceBundle getBundleInstance(String baseName, ULocale locale,
ClassLoader loader) {
if (baseName == null) {
baseName = ICUData.ICU_BASE_NAME;
* @return the locale of this resource bundle
* @stable ICU 3.0
*/
+ @Override
public Locale getLocale(){
return getULocale().toLocale();
}
return ICUResourceBundle.getBundleInstance(baseName, localeName, root, disableFallback);
case JAVA:
- return ResourceBundleWrapper.getBundleInstance(baseName, localeName, root,
+ return ResourceBundleWrapper.getBundleInstance(baseName, localeName, root,
disableFallback);
case MISSING:
default:
UResourceBundle b;
try{
- b = ICUResourceBundle.getBundleInstance(baseName, localeName, root,
+ b = ICUResourceBundle.getBundleInstance(baseName, localeName, root,
disableFallback);
setRootType(baseName, RootType.ICU);
}catch(MissingResourceException ex){
- b = ResourceBundleWrapper.getBundleInstance(baseName, localeName, root,
+ b = ResourceBundleWrapper.getBundleInstance(baseName, localeName, root,
disableFallback);
setRootType(baseName, RootType.JAVA);
}
public UResourceBundle get(int index) {
UResourceBundle obj = handleGet(index, null, this);
if (obj == null) {
- obj = (ICUResourceBundle) getParent();
+ obj = getParent();
if (obj != null) {
obj = obj.get(index);
}
* which is empty if this is not a bundle or a table resource
* @stable ICU 3.8
*/
+ @Override
public Enumeration<String> getKeys() {
return Collections.enumeration(keySet());
}
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public Set<String> keySet() {
// TODO: Java 6 ResourceBundle has keySet() which calls handleKeySet()
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
protected Set<String> handleKeySet() {
return Collections.emptySet();
/**
* {@icu} Returns the size of a resource. Size for scalar types is always 1, and for
* vector/table types is the number of child resources.
- *
+ *
* <br><b>Note:</b> Integer array is treated as a scalar type. There are no APIs to
* access individual members of an integer array. It is always returned as a whole.
* @return number of resources in a given resource.
* @return UResourceBundle a resource associated with the key
* @stable ICU 3.8
*/
- protected UResourceBundle handleGet(String aKey, HashMap<String, String> aliasesVisited,
+ protected UResourceBundle handleGet(String aKey, HashMap<String, String> aliasesVisited,
UResourceBundle requested) {
return null;
}
* @return UResourceBundle a resource associated with the index
* @stable ICU 3.8
*/
- protected UResourceBundle handleGet(int index, HashMap<String, String> aliasesVisited,
+ protected UResourceBundle handleGet(int index, HashMap<String, String> aliasesVisited,
UResourceBundle requested) {
return null;
}
// this method is declared in ResourceBundle class
// so cannot change the signature
// Override this method
+ @Override
protected Object handleGetObject(String aKey) {
return handleGetObjectImpl(aKey, this);
}
return new CurrencyDigits(data[0], data[1]);
}
}
-
+
private <T> List<T> collect(Collector<T> collector, CurrencyFilter filter) {
// We rely on the fact that the data lists the regions in order, and the
// priorities in order within region. This means we don't need
return defaultValue;
}
int[] values = b.getIntVector();
- return ((long) values[0] << 32) | (((long) values[1]) & MASK);
+ return ((long) values[0] << 32) | ((values[1]) & MASK);
}
// Utility, just because I don't like the n^2 behavior of using list.contains to build a
// about duplicates.
private List<CurrencyInfo> result = new ArrayList<CurrencyInfo>();
+ @Override
public void collect(String region, String currency, long from, long to, int priority, boolean tender) {
result.add(new CurrencyInfo(region, currency, from, to, priority, tender));
}
+ @Override
public List<CurrencyInfo> getList() {
return Collections.unmodifiableList(result);
}
+ @Override
public int collects() {
return Everything;
}
private static class RegionCollector implements Collector<String> {
private final UniqueList<String> result = UniqueList.create();
+ @Override
public void collect(
String region, String currency, long from, long to, int priority, boolean tender) {
result.add(region);
}
+ @Override
public int collects() {
return Region;
}
+ @Override
public List<String> getList() {
return result.list();
}
private static class CurrencyCollector implements Collector<String> {
private final UniqueList<String> result = UniqueList.create();
+ @Override
public void collect(
String region, String currency, long from, long to, int priority, boolean tender) {
result.add(currency);
}
+ @Override
public int collects() {
return Currency;
}
+ @Override
public List<String> getList() {
return result.list();
}
return sym;
}
- //@Override
+ @Override
public String getDisplayName(String currencyCode, Locale locale) {
CurrencyDisplayNames curDispNames = CurrencyDisplayNames.getInstance(ICULocaleServiceProvider.toULocaleNoSpecialVariant(locale));
String name = curDispNames.getName(currencyCode);
return disp;
}
- //@Override
+ @Override
public String getDisplayScript(String scriptCode, Locale locale) {
scriptCode = AsciiUtil.toTitleString(scriptCode);
String disp = LocaleDisplayNames.getInstance(ICULocaleServiceProvider.toULocaleNoSpecialVariant(locale))
return fIcuCollator;
}
+ @Override
public Object clone() {
CollatorICU other = (CollatorICU)super.clone();
try {
return other;
}
+ @Override
public int compare(Object o1, Object o2) {
return fIcuCollator.compare(o1, o2);
}
+ @Override
public int compare(String source, String target) {
return fIcuCollator.compare(source, target);
}
+ @Override
public boolean equals(Object that) {
if (that instanceof CollatorICU) {
return ((CollatorICU)that).fIcuCollator.equals(fIcuCollator);
return false;
}
+ @Override
public boolean equals(String source, String target) {
return fIcuCollator.equals(source, target);
}
+ @Override
public CollationKey getCollationKey(String source) {
com.ibm.icu.text.CollationKey icuCollKey = fIcuCollator.getCollationKey(source);
return CollationKeyICU.wrap(icuCollKey);
}
+ @Override
public int getDecomposition() {
int mode = java.text.Collator.NO_DECOMPOSITION;
return mode;
}
+ @Override
public int getStrength() {
int strength;
int icuStrength = fIcuCollator.getStrength();
return strength;
}
+ @Override
public int hashCode() {
return fIcuCollator.hashCode();
}
+ @Override
public void setDecomposition(int decompositionMode) {
switch (decompositionMode) {
case java.text.Collator.CANONICAL_DECOMPOSITION:
}
}
+ @Override
public void setStrength(int newStrength) {
switch (newStrength) {
case java.text.Collator.IDENTICAL:
* The target script code. Never USCRIPT_INVALID_CODE.
*/
private int targetScript;
-
+
/**
* Special code for handling width characters
*/
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position pos, boolean isIncremental) {
int allStart = pos.start;
* @param id the ID of the form S-T or S-T/V, where T is theTarget
* and V is theVariant. Must not be empty.
* @param filter The Unicode filter.
- * @param target2 the target name.
+ * @param target2 the target name.
* @param targetScript2 the script code corresponding to theTarget.
* @param widthFix2 The Transliterator width fix.
* @param cache2 The Map object for cache.
for (Enumeration<String> v = Transliterator.getAvailableVariants(source, target);
v.hasMoreElements(); ) {
String variant = v.nextElement();
-
+
// Only process each target/variant pair once
if (seenVariants.contains(variant)) {
continue;
| (1<<Character.NON_SPACING_MARK)
| (1<<Character.ENCLOSING_MARK)
;
+ @Override
protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
boundaryCount = 0;
int boundary = 0;
* Implements CharacterIterator.first() for String.
* @see CharacterIterator#first
*/
+ @Override
public char first()
{
pos = begin;
* Implements CharacterIterator.last() for String.
* @see CharacterIterator#last
*/
+ @Override
public char last()
{
if (end != begin) {
* Implements CharacterIterator.setIndex() for String.
* @see CharacterIterator#setIndex
*/
+ @Override
public char setIndex(int p)
{
if (p < begin || p > end) {
* Implements CharacterIterator.current() for String.
* @see CharacterIterator#current
*/
+ @Override
public char current()
{
if (pos >= begin && pos < end) {
* Implements CharacterIterator.next() for String.
* @see CharacterIterator#next
*/
+ @Override
public char next()
{
if (pos < end - 1) {
* Implements CharacterIterator.previous() for String.
* @see CharacterIterator#previous
*/
+ @Override
public char previous()
{
if (pos > begin) {
* Implements CharacterIterator.getBeginIndex() for String.
* @see CharacterIterator#getBeginIndex
*/
+ @Override
public int getBeginIndex()
{
return begin;
* Implements CharacterIterator.getEndIndex() for String.
* @see CharacterIterator#getEndIndex
*/
+ @Override
public int getEndIndex()
{
return end;
* Implements CharacterIterator.getIndex() for String.
* @see CharacterIterator#getIndex
*/
+ @Override
public int getIndex()
{
return pos;
* @return true if the given obj is the same as this
* ReplaceableCharacterIterator object; false otherwise.
*/
+ @Override
public boolean equals(Object obj)
{
if (this == obj) {
* Computes a hashcode for this iterator.
* @return A hash code
*/
+ @Override
public int hashCode()
{
return text.hashCode() ^ pos ^ begin ^ end;
* Creates a copy of this iterator.
* @return A copy of this
*/
+ @Override
public Object clone()
{
try {
* Package accessible ID.
*/
static final String _ID = "Any-CaseFold";
-
+
// TODO: Add variants for tr, az, lt, default = default locale
/**
*/
static void register() {
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new CaseFoldTransliterator();
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected synchronized void handleTransliterate(Replaceable text,
Position offsets, boolean isIncremental) {
if(csp==null) {
if(offsets.start >= offsets.limit) {
return;
- }
+ }
iter.setText(text);
result.setLength(0);
}
offsets.start = offsets.limit;
}
-
+
static SourceTargetUtility sourceTargetUtility = null;
-
+
/* (non-Javadoc)
* @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet)
*/
synchronized (UppercaseTransliterator.class) {
if (sourceTargetUtility == null) {
sourceTargetUtility = new SourceTargetUtility(new Transform<String,String>() {
+ @Override
public String transform(String source) {
return UCharacter.foldCase(source, true);
}
/**
* Internal method for safeClone...
* @param id
- * @param filter2
+ * @param filter2
* @param trans2
* @param numAnonymousRBTs2
*/
trans = trans2;
numAnonymousRBTs = numAnonymousRBTs2;
}
-
+
/**
* Finish constructing a transliterator: only to be called by
* constructors. Before calling init(), set trans and filter to NULL.
* U+000A, U+0020..U+007E.
* @return the rule string
*/
+ @Override
public String toRules(boolean escapeUnprintable) {
// We do NOT call toRules() on our component transliterators, in
// general. If we have several rule-based transliterators, this
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position index, boolean incremental) {
/* Call each transliterator with the same start value and
static void register() {
// Unicode: "U+10FFFF" hex, min=4, max=6
Transliterator.registerFactory("Any-Hex/Unicode", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/Unicode",
"U+", "", 16, 4, true, null);
}
});
-
+
// Java: "\\uFFFF" hex, min=4, max=4
Transliterator.registerFactory("Any-Hex/Java", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/Java",
"\\u", "", 16, 4, false, null);
}
});
-
+
// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
Transliterator.registerFactory("Any-Hex/C", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/C",
"\\u", "", 16, 4, true,
new EscapeTransliterator("", "\\U", "", 16, 8, true, null));
}
});
-
+
// XML: "" hex, min=1, max=6
Transliterator.registerFactory("Any-Hex/XML", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/XML",
"&#x", ";", 16, 1, true, null);
// XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
Transliterator.registerFactory("Any-Hex/XML10", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/XML10",
"&#", ";", 10, 1, true, null);
// Perl: "\\x{263A}" hex, min=1, max=6
Transliterator.registerFactory("Any-Hex/Perl", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/Perl",
"\\x{", "}", 16, 1, true, null);
// Plain: "FFFF" hex, min=4, max=6
Transliterator.registerFactory("Any-Hex/Plain", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex/Plain",
"", "", 16, 4, true, null);
}
});
-
+
// Generic
Transliterator.registerFactory("Any-Hex", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new EscapeTransliterator("Any-Hex",
"\\u", "", 16, 4, false, null);
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position pos, boolean incremental) {
int start = pos.start;
/**
* UnicodeReplacer API
*/
+ @Override
public int replace(Replaceable text,
int start,
int limit,
/**
* UnicodeReplacer API
*/
+ @Override
public String toReplacerPattern(boolean escapeUnprintable) {
StringBuilder rule = new StringBuilder("&");
rule.append(translit.getID());
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
+ @Override
public void addReplacementSetTo(UnicodeSet toUnionTo) {
toUnionTo.addAll(translit.getTargetSet());
}
*/
static void register() {
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NameUnicodeTransliterator(null);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position offsets, boolean isIncremental) {
// 1 - after open delimiter
int mode = 0;
int openPos = -1; // open delim candidate pos
-
+
int c;
while (cursor < limit) {
c = text.char32At(cursor);
// to a single space. If closeDelimiter is found, exit
// the loop. If any other character is found, exit the
// loop. If the limit is reached, exit the loop.
-
+
// Convert \s+ => SPACE. This assumes there are no
// runs of >1 space characters in names.
if (PatternProps.isWhiteSpace(c)) {
if (c == CLOSE_DELIM) {
int len = name.length();
-
+
// Delete trailing space, if any
if (len > 0 &&
name.charAt(len-1) == SPACE) {
public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) {
UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter);
if (!myFilter.containsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.contains(CLOSE_DELIM)) {
- return; // we have to contain both prefix and suffix
+ return; // we have to contain both prefix and suffix
}
UnicodeSet items = new UnicodeSet()
.addAll('0', '9')
*/
static void register() {
Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("NFC", Normalizer2.getNFCInstance());
}
});
Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("NFD", Normalizer2.getNFDInstance());
}
});
Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("NFKC", Normalizer2.getNFKCInstance());
}
});
Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("NFKD", Normalizer2.getNFKDInstance());
}
});
Transliterator.registerFactory("Any-FCD", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("FCD", Norm2AllModes.getFCDNormalizer2());
}
});
Transliterator.registerFactory("Any-FCC", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new NormalizationTransliterator("FCC", Norm2AllModes.getNFCInstance().fcc);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position offsets, boolean isIncremental) {
// start and limit of the input range
}
static final Map<Normalizer2, SourceTargetUtility> SOURCE_CACHE = new HashMap<Normalizer2, SourceTargetUtility>();
-
+
// TODO Get rid of this if Normalizer2 becomes a Transform
static class NormalizingTransform implements Transform<String,String> {
final Normalizer2 norm2;
public NormalizingTransform(Normalizer2 norm2) {
this.norm2 = norm2;
}
+ @Override
public String transform(String source) {
return norm2.normalize(source);
- }
+ }
}
/* (non-Javadoc)
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
offsets.start = offsets.limit;
*/
static void register() {
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new RemoveTransliterator();
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position index, boolean incremental) {
// Our caller (filteredTransliterate) has already narrowed us
@Deprecated
public class RuleBasedTransliterator extends Transliterator {
- private Data data;
+ private final Data data;
// /**
// * Constructs a new transliterator from the given rules.
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
protected void handleTransliterate(Replaceable text,
Position index, boolean incremental) {
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public String toRules(boolean escapeUnprintable) {
return data.ruleSet.toRules(escapeUnprintable);
// public UnicodeSet getTargetSet() {
// return data.ruleSet.getSourceTargetSet(true, unicodeFilter);
// }
-
+
/**
* @internal
* @deprecated This API is ICU internal only.
* match.
*/
private int matchStart;
-
+
/**
* Limit offset, in the match text, of the <em>rightmost</em>
* match.
/**
* Implement UnicodeMatcher
*/
+ @Override
public int matches(Replaceable text,
int[] offset,
int limit,
/**
* Implement UnicodeMatcher
*/
+ @Override
public String toPattern(boolean escapeUnprintable) {
StringBuffer result = new StringBuffer();
StringBuffer quoteBuf = new StringBuffer();
/**
* Implement UnicodeMatcher
*/
+ @Override
public boolean matchesIndexValue(int v) {
if (pattern.length() == 0) {
return true;
* set.
* @param toUnionTo the set into which to union the source characters
*/
+ @Override
public void addMatchSetTo(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) {
/**
* UnicodeReplacer API
*/
+ @Override
public int replace(Replaceable text,
int start,
int limit,
/**
* UnicodeReplacer API
*/
+ @Override
public String toReplacerPattern(boolean escapeUnprintable) {
// assert(segmentNumber > 0);
StringBuffer rule = new StringBuffer("$");
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
+ @Override
public void addReplacementSetTo(UnicodeSet toUnionTo) {
// The output of this replacer varies; it is the source text between
// matchStart and matchLimit. Since this varies depending on the
/**
* UnicodeReplacer API
*/
+ @Override
public int replace(Replaceable text,
int start,
int limit,
// Delete the old text (the key)
text.replace(start + outLen, limit + outLen, "");
- }
+ }
if (hasCursor) {
// Adjust the cursor for positions outside the key. These
/**
* UnicodeReplacer API
*/
+ @Override
public String toReplacerPattern(boolean escapeUnprintable) {
StringBuffer rule = new StringBuffer();
StringBuffer quoteBuf = new StringBuffer();
* into the given set.
* @param toUnionTo the set into which to union the output characters
*/
+ @Override
public void addReplacementSetTo(UnicodeSet toUnionTo) {
int ch;
for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
* Return a string representation of this object.
* @return string representation of this object
*/
+ @Override
public String toString() {
return '{' + toRule(true) + '}';
}
/**
- * Find the source and target sets, subject to the input filter.
+ * Find the source and target sets, subject to the input filter.
* There is a known issue with filters containing multiple characters.
*/
// TODO: Problem: the rule is [{ab}]c > x
* changes Russian text written in Cyrillic characters to phonetically equivalent Latin characters. It does not
* <em>translate</em> Russian to English! Transliteration, unlike translation, operates on characters, without reference
* to the meanings of words and sentences.
- *
+ *
* <p>
* Although script conversion is its most common use, a transliterator can actually perform a more general class of
* tasks. In fact, <code>Transliterator</code> defines a very general API which specifies only that a segment of the
* input text is replaced by new text. The particulars of this conversion are determined entirely by subclasses of
* <code>Transliterator</code>.
- *
+ *
* <p>
* <b>Transliterators are stateless</b>
- *
+ *
* <p>
* <code>Transliterator</code> objects are <em>stateless</em>; they retain no information between calls to
* <code>transliterate()</code>. As a result, threads may share transliterators without synchronizing them. This might
* transliterations by delaying the replacement of text until it is known that no other replacements are possible. In
* other words, although the <code>Transliterator</code> objects are stateless, the source text itself embodies all the
* needed information, and delayed operation allows arbitrary complexity.
- *
+ *
* <p>
* <b>Batch transliteration</b>
- *
+ *
* <p>
* The simplest way to perform transliteration is all at once, on a string of existing text. This is referred to as
* <em>batch</em> transliteration. For example, given a string <code>input</code> and a transliterator <code>t</code>,
* the call
- *
+ *
* <blockquote><code>String result = t.transliterate(input);
* </code></blockquote>
- *
+ *
* will transliterate it and return the result. Other methods allow the client to specify a substring to be
* transliterated and to use {@link Replaceable} objects instead of strings, in order to preserve out-of-band
* information (such as text styles).
- *
+ *
* <p>
* <b>Keyboard transliteration</b>
- *
+ *
* <p>
* Somewhat more involved is <em>keyboard</em>, or incremental transliteration. This is the transliteration of text that
* is arriving from some source (typically the user's keyboard) one character at a time, or in some other piecemeal
* fashion.
- *
+ *
* <p>
* In keyboard transliteration, a <code>Replaceable</code> buffer stores the text. As text is inserted, as much as
* possible is transliterated on the fly. This means a GUI that displays the contents of the buffer may show text being
* modified as each new character arrives.
- *
+ *
* <p>
* Consider the simple <code>RuleBasedTransliterator</code>:
- *
+ *
* <blockquote><code>
* th>{theta}<br>
* t>{tau}
* </code></blockquote>
- *
+ *
* When the user types 't', nothing will happen, since the transliterator is waiting to see if the next character is
* 'h'. To remedy this, we introduce the notion of a cursor, marked by a '|' in the output string:
- *
+ *
* <blockquote><code>
* t>|{tau}<br>
* {tau}h>{theta}
* </code></blockquote>
- *
+ *
* Now when the user types 't', tau appears, and if the next character is 'h', the tau changes to a theta. This is
* accomplished by maintaining a cursor position (independent of the insertion point, and invisible in the GUI) across
* calls to <code>transliterate()</code>. Typically, the cursor will be coincident with the insertion point, but in a
* case like the one above, it will precede the insertion point.
- *
+ *
* <p>
* Keyboard transliteration methods maintain a set of three indices that are updated with each call to
* <code>transliterate()</code>, including the cursor, start, and limit. These indices are changed by the method, and
* <code>RuleBasedTransliterator</code>. Any characters before the <code>cursor</code> index are frozen; future keyboard
* transliteration calls within this input sequence will not change them. New text is inserted at the <code>limit</code>
* index, which marks the end of the substring that the transliterator looks at.
- *
+ *
* <p>
* Because keyboard transliteration assumes that more characters are to arrive, it is conservative in its operation. It
* only transliterates when it can do so unambiguously. Otherwise it waits for more characters to arrive. When the
* client code knows that no more characters are forthcoming, perhaps because the user has performed some input
* termination operation, then it should call <code>finishTransliteration()</code> to complete any pending
* transliterations.
- *
+ *
* <p>
* <b>Inverses</b>
- *
+ *
* <p>
* Pairs of transliterators may be inverses of one another. For example, if transliterator <b>A</b> transliterates
* characters by incrementing their Unicode value (so "abc" -> "def"), and transliterator <b>B</b> decrements character
* values, then <b>A</b> is an inverse of <b>B</b> and vice versa. If we compose <b>A</b> with <b>B</b> in a compound
* transliterator, the result is the indentity transliterator, that is, a transliterator that does not change its input
* text.
- *
+ *
* The <code>Transliterator</code> method <code>getInverse()</code> returns a transliterator's inverse, if one exists,
* or <code>null</code> otherwise. However, the result of <code>getInverse()</code> usually will <em>not</em> be a true
* mathematical inverse. This is because true inverse transliterators are difficult to formulate. For example, consider
* two transliterators: <b>AB</b>, which transliterates the character 'A' to 'B', and <b>BA</b>, which transliterates
* 'B' to 'A'. It might seem that these are exact inverses, since
- *
+ *
* <blockquote>"A" x <b>AB</b> -> "B"<br>
* "B" x <b>BA</b> -> "A"</blockquote>
- *
+ *
* where 'x' represents transliteration. However,
- *
+ *
* <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
* "BBCD" x <b>BA</b> -> "AACD"</blockquote>
- *
+ *
* so <b>AB</b> composed with <b>BA</b> is not the identity. Nonetheless, <b>BA</b> may be usefully considered to be
* <b>AB</b>'s inverse, and it is on this basis that <b>AB</b><code>.getInverse()</code> could legitimately return
* <b>BA</b>.
- *
+ *
* <p>
* <b>Filtering</b>
* <p>Each transliterator has a filter, which restricts changes to those characters selected by the filter. The
* filter affects just the characters that are changed -- the characters outside of the filter are still part of the
* context for the filter. For example, in the following even though 'x' is filtered out, and doesn't convert to y, it does affect the conversion of 'a'.
- *
+ *
* <pre>
* String rules = "x > y; x{a} > b; ";
* Transliterator tempTrans = Transliterator.createFromRules("temp", rules, Transliterator.FORWARD);
*</pre>
* <p>
* <b>IDs and display names</b>
- *
+ *
* <p>
* A transliterator is designated by a short identifier string or <em>ID</em>. IDs follow the format
* <em>source-destination</em>, where <em>source</em> describes the entity being replaced, and <em>destination</em>
* Russian to Latin might be named "Russian-Latin". A transliterator from keyboard escape sequences to Latin-1
* characters might be named "KeyboardEscape-Latin1". By convention, system entity names are in English, with the
* initial letters of words capitalized; user entity names may follow any format so long as they do not contain dashes.
- *
+ *
* <p>
* In addition to programmatic IDs, transliterator objects have display names for presentation in user interfaces,
* returned by {@link #getDisplayName}.
- *
+ *
* <p>
* <b>Factory methods and registration</b>
- *
+ *
* <p>
* In general, client code should use the factory method <code>getInstance()</code> to obtain an instance of a
* transliterator given its ID. Valid IDs may be enumerated using <code>getAvailableIDs()</code>. Since transliterators
* are stateless, multiple calls to <code>getInstance()</code> with the same ID will return the same object.
- *
+ *
* <p>
* In addition to the system transliterators registered at startup, user transliterators may be registered by calling
* <code>registerInstance()</code> at run time. To register a transliterator subclass without instantiating it (until it
* is needed), users may call <code>registerClass()</code>.
- *
+ *
* <p>
* <b>Composed transliterators</b>
- *
+ *
* <p>
* In addition to built-in system transliterators like "Latin-Greek", there are also built-in <em>composed</em>
* transliterators. These are implemented by composing two or more component transliterators. For example, if we have
* <sup>2</sup> - <em>n</em>, so as <em>n</em> gets larger the gain becomes significant. With 9 scripts, it's 18 vs. 72
* rule sets, a big difference.) Note the use of "~" rather than "-" for the script separator here; this indicates that
* the given transliterator is intended to be composed with others, rather than be used as is.
- *
+ *
* <p>
* Composed transliterators can be instantiated as usual. For example, the system transliterator "Devanagari-Gujarati"
* is a composed transliterator built internally as "Devanagari~InterIndic;InterIndic~Gujarati". When this
* transliterator is instantiated, it appears externally to be a standard transliterator (e.g., getID() returns
* "Devanagari-Gujarati").
- *
+ *
* <p>
* <b>Subclassing</b>
- *
+ *
* <p>
* Subclasses must implement the abstract method <code>handleTransliterate()</code>.
* <p>
* Subclasses should override the <code>transliterate()</code> method taking a <code>Replaceable</code> and the
* <code>transliterate()</code> method taking a <code>String</code> and <code>StringBuffer</code> if the performance of
* these methods can be improved over the performance obtained by the default implementations in this class.
- *
+ *
* <p>
* Copyright © IBM Corporation 1999. All rights reserved.
- *
+ *
* @author Alan Liu
* @stable ICU 2.0
*/
* Returns true if this Position is equal to the given object.
* @stable ICU 2.6
*/
+ @Override
public boolean equals(Object obj) {
if (obj instanceof Position) {
Position pos = (Position) obj;
}
return false;
}
-
+
/**
* Mock implementation of hashCode(). This implementation always returns a constant
* value. When Java assertion is enabled, this method triggers an assertion failure.
* @internal
* @deprecated This API is ICU internal only.
*/
+ @Override
@Deprecated
public int hashCode() {
assert false : "hashCode not designed";
* Returns a string representation of this Position.
* @stable ICU 2.6
*/
+ @Override
public String toString() {
return "[cs=" + contextStart
+ ", s=" + start
}
/**
- * Returns the intersectionof this instance's filter intersected with an external filter.
+ * Returns the intersectionof this instance's filter intersected with an external filter.
* The externalFilter must be frozen (it is frozen if not).
* The result may be frozen, so don't attempt to modify.
* @internal
/**
* Register a factory object with the given ID. The factory
* method should return a new instance of the given transliterator.
- *
+ *
* <p>Because ICU may choose to cache Transliterator objects internally, this must
* be called at application startup, prior to any calls to
* Transliterator.getInstance to avoid undefined behavior.
- *
+ *
* @param ID the ID of this transliterator
* @param factory the factory object
* @stable ICU 2.0
/**
* Register a Transliterator object with the given ID.
- *
+ *
* <p>Because ICU may choose to cache Transliterator objects internally, this must
* be called at application startup, prior to any calls to
* Transliterator.getInstance to avoid undefined behavior.
- *
+ *
* @param trans the Transliterator object
* @stable ICU 2.2
*/
/**
* Register a Transliterator object.
- *
+ *
* <p>Because ICU may choose to cache Transliterator objects internally, this must
* be called at application startup, prior to any calls to
* Transliterator.getInstance to avoid undefined behavior.
- *
+ *
* @param trans the Transliterator object
*/
static void registerInstance(Transliterator trans, boolean visible) {
* Register an ID as an alias of another ID. Instantiating
* alias ID produces the same result as instantiating the original ID.
* This is generally used to create short aliases of compound IDs.
- *
+ *
* <p>Because ICU may choose to cache Transliterator objects internally, this must
* be called at application startup, prior to any calls to
* Transliterator.getInstance to avoid undefined behavior.
- *
+ *
* @param aliasID The new ID being registered.
* @param realID The existing ID that the new ID should be an alias of.
* @stable ICU 3.6
* <id> is the ID of the system transliterator being defined. These
* are public IDs enumerated by Transliterator.getAvailableIDs(),
* unless the second field is "internal".
- *
+ *
* <resource> is a ResourceReader resource name. Currently these refer
* to file names under com/ibm/text/resources. This string is passed
* directly to ResourceReader, together with <encoding>.
- *
+ *
* <direction> is either "FORWARD" or "REVERSE".
- *
+ *
* <getInstanceArg> is a string to be passed directly to
* Transliterator.getInstance(). The returned Transliterator object
* then has its ID changed to <id> and is returned.
BreakTransliterator.register();
AnyTransliterator.register(); // do this last!
}
-
+
/**
* Register the script-based "Any" transliterators: Any-Latin, Any-Greek
* @internal
*/
Transliterator getInstance(String ID);
}
-
+
/**
* Implements StringTransform via this method.
* @param source text to be transformed (eg lowercased)
* @return result
* @stable ICU 3.8
*/
+ @Override
public String transform(String source) {
return transliterate(source);
}
/**
* Vector of StringMatcher objects for segments. Used during the
- * parsing of a single rule.
+ * parsing of a single rule.
* segmentStandins.charAt(0) is the standin for "$1" and corresponds
* to StringMatcher object segmentObjects.elementAt(0), etc.
*/
private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow
private static final char ALT_FWDREV_RULE_OP = '\u2194'; // Left Right Arrow
private static final char ALT_FUNCTION = '\u2206'; // Increment (~Greek Capital Delta)
-
+
// Special characters disallowed at the top level
private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]");
/**
* Implement SymbolTable API.
*/
+ @Override
public char[] lookup(String name) {
return variableNames.get(name);
}
/**
* Implement SymbolTable API.
*/
+ @Override
public UnicodeMatcher lookupMatcher(int ch) {
// Note that we cannot use data.lookup() because the
// set array has not been constructed yet.
* Implement SymbolTable API. Parse out a symbol reference
* name.
*/
+ @Override
public String parseReference(String text, ParsePosition pos, int limit) {
int start = pos.getIndex();
int i = start;
String[] array;
int i;
public RuleArray(String[] array) { this.array = array; i = 0; }
+ @Override
public String handleNextLine() {
return (i < array.length) ? array[i++] : null;
}
+ @Override
public void reset() {
i = 0;
}
}
pp.setIndex(pos-1); // Backup to opening '['
buf.append(parser.parseSet(rule, pp));
- pos = pp.getIndex();
+ pos = pp.getIndex();
continue;
}
// Handle escapes
}
}
quoteLimit = buf.length();
-
+
for (iq=quoteStart; iq<quoteLimit; ++iq) {
parser.checkVariableRange(buf.charAt(iq), rule, start);
}
}
switch (c) {
-
+
//------------------------------------------------------
// Elements allowed within and out of segments
//------------------------------------------------------
break;
}
///CLOVER:ON
-
+
int qstart, qlimit;
// The */+ follows an isolated character or quote
// or variable reference
} catch (RuntimeException e) {
final String precontext = pos < 50 ? rule.substring(0, pos) : "..." + rule.substring(pos - 50, pos);
final String postContext = limit-pos <= 50 ? rule.substring(pos, limit) : rule.substring(pos, pos+50) + "...";
- throw (RuntimeException)
- new IllegalIcuArgumentException("Failure in rule: " + precontext + "$$$"
- + postContext).initCause(e);
+ throw new IllegalIcuArgumentException("Failure in rule: " + precontext + "$$$"
+ + postContext).initCause(e);
}
int min = 0;
int max = Quantifier.MAX;
public void parse(String rules, int dir) {
parseRules(new RuleArray(new String[] { rules }), dir);
}
-
+
/*
* Parse a set of rules. After the parse completes, examine the public
* data members for results.
if (start > end || start < 0 || end > 0xFFFF) {
throw new IllegalIcuArgumentException("Invalid variable range " + start + ", " + end);
}
-
+
curData.variablesBase = (char) start; // first private use
if (dataVector.size() == 0) {
// know that pos points to /use\s/i; we can skip 4 characters
// immediately
pos += 4;
-
+
// Here are the pragmas we recognize:
// use variable range 0xE000 0xEFFF;
// use maximum backup 16;
}
return c;
}
-
+
/**
* Set the object for segment seg (1-based).
*/
en = e;
}
+ @Override
public boolean hasMoreElements() {
return en != null && en.hasMoreElements();
}
+ @Override
public String nextElement() {
return (en.nextElement()).getString();
}
TransliteratorParser parser = new TransliteratorParser();
try {
-
+
ResourceEntry re = (ResourceEntry) entry;
parser.parse(re.resource, re.direction);
-
+
} catch (ClassCastException e) {
// If we pull a rule from a locale resource bundle it will
// be a LocaleEntry.
static void register() {
// Unicode: "U+10FFFF" hex, min=4, max=6
Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Unicode", new char[] {
2, 0, 16, 4, 6, 'U', '+',
});
}
});
-
+
// Java: "\\uFFFF" hex, min=4, max=4
Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Java", new char[] {
2, 0, 16, 4, 4, '\\', 'u',
});
}
});
-
+
// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/C", new char[] {
2, 0, 16, 4, 4, '\\', 'u',
});
}
});
-
+
// XML: "" hex, min=1, max=6
Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/XML", new char[] {
3, 1, 16, 1, 6, '&', '#', 'x', ';',
// XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any")
Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/XML10", new char[] {
2, 1, 10, 1, 7, '&', '#', ';',
// Perl: "\\x{263A}" hex, min=1, max=6
Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any/Perl", new char[] {
3, 1, 16, 1, 6, '\\', 'x', '{', '}',
// All: Java, C, Perl, XML, XML10, Unicode
Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnescapeTransliterator("Hex-Any", new char[] {
2, 0, 16, 4, 6, 'U', '+', // Unicode
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position pos, boolean isIncremental) {
int start = pos.start;
*/
static void register() {
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
+ @Override
public Transliterator getInstance(String ID) {
return new UnicodeNameTransliterator(null);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
+ @Override
protected void handleTransliterate(Replaceable text,
Position offsets, boolean isIncremental) {
int cursor = offsets.start;
int limit = offsets.limit;
-
+
StringBuilder str = new StringBuilder();
str.append(OPEN_DELIM);
int len;
String name;
-
+
while (cursor < limit) {
int c = text.char32At(cursor);
if ((name=UCharacter.getExtendedName(c)) != null) {
-
+
str.setLength(OPEN_DELIM_LEN);
str.append(name).append(CLOSE_DELIM);