/*
****************************************************************************
- * Copyright (C) 2005-2012, International Business Machines Corporation and *
+ * Copyright (C) 2005-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
************************************************************************** *
*
{
//arabic shaping class, method shape/unshape
protected static ArabicShaping as = new ArabicShaping(ArabicShaping.LETTERS_UNSHAPE);
- protected byte[] prev_fInputBytes = null;
- protected int prev_fInputLen = 0;
protected static byte[] byteMap = {
/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
{
return "ar";
}
- protected void matchInit(CharsetDetector det)
- {
- assert prev_fInputBytes == null;
- prev_fInputBytes = det.fInputBytes;
- prev_fInputLen = det.fInputLen;
- det.fInputBytes = unshape(prev_fInputBytes, prev_fInputLen);
- det.fInputLen = det.fInputBytes.length;
- }
-
/*
* Arabic shaping needs to be done manually. Cannot call ArabicShaping class
* because CharsetDetector is dealing with bytes not Unicode code points. We could
* on CharsetICU which we try to avoid. IBM420 converter amongst different versions
* of JDK can produce different results and therefore is also avoided.
*/
- private byte[] unshape(byte[] inputBytes, int inputLen) {
+ byte[] unshape(byte[] inputBytes, int inputLen) {
byte resultByteArr[] = unshapeLamAlef(inputBytes, inputLen);
for (int i=0; i<resultByteArr.length; i++){
return true;
return false;
}
-
- protected void matchFinish(CharsetDetector det) {
- if (prev_fInputBytes != null) {
- det.fInputBytes = prev_fInputBytes;
- det.fInputLen = prev_fInputLen;
- prev_fInputBytes = null;
- }
- }
-
+
}
static class CharsetRecog_IBM420_ar_rtl extends CharsetRecog_IBM420_ar
{
}
public CharsetMatch match(CharsetDetector det)
{
- matchInit(det);
+ byte[] prev_fInputBytes = det.fInputBytes;
+ int prev_fInputLen = det.fInputLen;
+ det.fInputBytes = unshape(prev_fInputBytes, prev_fInputLen);
+ det.fInputLen = det.fInputBytes.length;
+
int confidence = match(det, ngrams, byteMap, (byte)0x40);
- matchFinish(det);
+
+ det.fInputBytes = prev_fInputBytes;
+ det.fInputLen = prev_fInputLen;
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
}
public CharsetMatch match(CharsetDetector det)
{
- matchInit(det);
+ byte[] prev_fInputBytes = det.fInputBytes;
+ int prev_fInputLen = det.fInputLen;
+ det.fInputBytes = unshape(prev_fInputBytes, prev_fInputLen);
+ det.fInputLen = det.fInputBytes.length;
+
int confidence = match(det, ngrams, byteMap, (byte)0x40);
- matchFinish(det);
+
+ det.fInputBytes = prev_fInputBytes;
+ det.fInputLen = prev_fInputLen;
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
}
/**
*******************************************************************************
- * Copyright (C) 2005-2012, International Business Machines Corporation and *
+ * Copyright (C) 2005-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
detectedEncodings.add(m.getName()));
}
}
+
+ public void TestMultithreaded() {
+ String s = "This is some random plain text to run charset detection on.";
+ final byte [] bytes;
+ try {
+ bytes = s.getBytes("ISO-8859-1");
+ }
+ catch (Exception e) {
+ fail("Unexpected exception " + e.toString());
+ return;
+ }
+
+ class WorkerThread extends Thread {
+ WorkerThread(int num) {
+ n = num;
+ }
+ private int n;
+ public void run() {
+ // System.out.println("Thread " + n + " is running.");
+ CharsetDetector det = new CharsetDetector();
+ det.setText(bytes);
+ for (int i=0; i<10000; i++) {
+ CharsetMatch matches[] = det.detectAll();
+ for (CharsetMatch m: matches) {
+ assertNotNull("Failure in thread " + n, m);
+ }
+ }
+ // System.out.println("Thread " + n + " is finished.");
+ }
+ }
+
+ Thread threads[] = new Thread[10];
+ for (int i=0; i<10; i++) {
+ threads[i] = new WorkerThread(i);
+ threads[i].start();
+ }
+ for (Thread thread: threads) {
+ try {
+ thread.join();
+ } catch(Exception e) {
+ fail("Unexpected exception " + e.toString());
+ return;
+ }
+ }
+ }
}