From 4f59ab84ccf97a0a448c9ace10929eaa70558640 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Tue, 12 Feb 2013 23:11:58 +0000 Subject: [PATCH] ICU-9657 Thread safety fix in charset detector. X-SVN-Rev: 33199 --- .../com/ibm/icu/text/CharsetRecog_sbcs.java | 45 ++++++++---------- .../test/charsetdet/TestCharsetDetector.java | 47 ++++++++++++++++++- 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java index 1ea6d2fc726..2d3b0c13be4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java @@ -1,6 +1,6 @@ /* **************************************************************************** - * Copyright (C) 2005-2012, International Business Machines Corporation and * + * Copyright (C) 2005-2013, International Business Machines Corporation and * * others. All Rights Reserved. * ************************************************************************** * * @@ -1033,8 +1033,6 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { { //arabic shaping class, method shape/unshape protected static ArabicShaping as = new ArabicShaping(ArabicShaping.LETTERS_UNSHAPE); - protected byte[] prev_fInputBytes = null; - protected int prev_fInputLen = 0; protected static byte[] byteMap = { /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ @@ -1080,15 +1078,6 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { { return "ar"; } - protected void matchInit(CharsetDetector det) - { - assert prev_fInputBytes == null; - prev_fInputBytes = det.fInputBytes; - prev_fInputLen = det.fInputLen; - det.fInputBytes = unshape(prev_fInputBytes, prev_fInputLen); - det.fInputLen = det.fInputBytes.length; - } - /* * Arabic shaping needs to be done manually. Cannot call ArabicShaping class * because CharsetDetector is dealing with bytes not Unicode code points. We could @@ -1096,7 +1085,7 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer { * on CharsetICU which we try to avoid. IBM420 converter amongst different versions * of JDK can produce different results and therefore is also avoided. */ - private byte[] unshape(byte[] inputBytes, int inputLen) { + byte[] unshape(byte[] inputBytes, int inputLen) { byte resultByteArr[] = unshapeLamAlef(inputBytes, inputLen); for (int i=0; i