/**
*******************************************************************************
-* Copyright (C) 2005-2011, International Business Machines Corporation and *
+* Copyright (C) 2005-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
byte[] fInputBytes = // The text to be checked. Markup will have been
new byte[kBufSize]; // removed if appropriate.
- int fInputLen; // Length of the byte data in fInputText.
+ int fInputLen; // Length of the byte data in fInputBytes.
short fByteStats[] = // byte frequency statistics for the input text.
new short[256]; // Value is percent, not absolute.
//arabic shaping class, method shape/unshape
protected static ArabicShaping as = new ArabicShaping(ArabicShaping.LETTERS_UNSHAPE);
protected byte[] prev_fInputBytes = null;
+ protected int prev_fInputLen = 0;
protected static byte[] byteMap = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
protected void matchInit(CharsetDetector det)
{
assert prev_fInputBytes == null;
- prev_fInputBytes = new byte[det.fInputLen];
- System.arraycopy(det.fInputBytes, 0, prev_fInputBytes, 0, det.fInputLen);
- byte bb[] = unshape(prev_fInputBytes);
- System.arraycopy(bb, 0, det.fInputBytes, 0, bb.length);
- det.fInputLen = bb.length;
+ prev_fInputBytes = det.fInputBytes;
+ prev_fInputLen = det.fInputLen;
+ det.fInputBytes = unshape(prev_fInputBytes, prev_fInputLen);
+ det.fInputLen = det.fInputBytes.length;
}
/*
* on CharsetICU which we try to avoid. IBM420 converter amongst different versions
* of JDK can produce different results and therefore is also avoided.
*/
- private byte[] unshape(byte[] inputBytes) {
- byte resultByteArr[] = unshapeLamAlef(inputBytes);
+ private byte[] unshape(byte[] inputBytes, int inputLen) {
+ byte resultByteArr[] = unshapeLamAlef(inputBytes, inputLen);
- for (int i=0; i<inputBytes.length; i++){
+ for (int i=0; i<resultByteArr.length; i++){
resultByteArr[i] = unshapeMap[resultByteArr[i]& 0xFF];
}
return resultByteArr;
}
- private byte[] unshapeLamAlef(byte[] inputBytes) {
- ByteBuffer resultBigBuffer = ByteBuffer.allocate(inputBytes.length*2);
+ private byte[] unshapeLamAlef(byte[] inputBytes, int inputLen) {
+ ByteBuffer resultBigBuffer = ByteBuffer.allocate(inputLen*2);
ByteBuffer resultBuffer;
byte unshapedLamAlef[] = {(byte)0xb1, (byte)0x56};
- for (int i=0; i<inputBytes.length; i++){
+ for (int i=0; i<inputLen; i++){
if (isLamAlef(inputBytes[i]))
resultBigBuffer.put(unshapedLamAlef);
else
protected void matchFinish(CharsetDetector det) {
if (prev_fInputBytes != null) {
- System.arraycopy(prev_fInputBytes, 0, det.fInputBytes, 0, prev_fInputBytes.length);
- det.fInputLen = prev_fInputBytes.length;
+ det.fInputBytes = prev_fInputBytes;
+ det.fInputLen = prev_fInputLen;
prev_fInputBytes = null;
}
}