From: Yoshito Umaoka <y.umaoka@gmail.com>
Date: Tue, 17 Sep 2013 06:57:53 +0000 (+0000)
Subject: ICU-10012 Disable EBCDIC Arabic/Hebrew detectors by default. Added APIs to enable... 
X-Git-Tag: milestone-59-0-1~2507
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=920dadff8d99c7d1bbe62fad8bf377849eb1bb70;p=icu

ICU-10012 Disable EBCDIC Arabic/Hebrew detectors by default. Added APIs to enable/disable each charset and get currently active charsets.

X-SVN-Rev: 34350
---

diff --git a/icu4c/source/i18n/csdetect.cpp b/icu4c/source/i18n/csdetect.cpp
index c7f82b5525a..5115684d5f5 100644
--- a/icu4c/source/i18n/csdetect.cpp
+++ b/icu4c/source/i18n/csdetect.cpp
@@ -32,7 +32,21 @@
 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
 #define DELETE_ARRAY(array) uprv_free((void *) (array))
 
-static icu::CharsetRecognizer **fCSRecognizers = NULL;
+U_NAMESPACE_BEGIN
+
+struct CSRecognizerInfo : public UMemory {
+    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
+        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
+
+    ~CSRecognizerInfo() {delete recognizer;};
+
+    CharsetRecognizer *recognizer;
+    UBool isDefaultEnabled;
+};
+
+U_NAMESPACE_END
+
+static icu::CSRecognizerInfo **fCSRecognizers = NULL;
 static icu::UInitOnce gCSRecognizersInitOnce;
 static int32_t fCSRecognizers_size = 0;
 
@@ -70,47 +84,48 @@ charsetMatchComparator(const void * /*context*/, const void *left, const void *r
 static void U_CALLCONV initRecognizers(UErrorCode &status) {
     U_NAMESPACE_USE
     ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
-    CharsetRecognizer *tempArray[] = {
-        new CharsetRecog_UTF8(),
-
-        new CharsetRecog_UTF_16_BE(),
-        new CharsetRecog_UTF_16_LE(),
-        new CharsetRecog_UTF_32_BE(),
-        new CharsetRecog_UTF_32_LE(),
-
-        new CharsetRecog_8859_1(),
-        new CharsetRecog_8859_2(),
-        new CharsetRecog_8859_5_ru(),
-        new CharsetRecog_8859_6_ar(),
-        new CharsetRecog_8859_7_el(),
-        new CharsetRecog_8859_8_I_he(),
-        new CharsetRecog_8859_8_he(),
-        new CharsetRecog_windows_1251(),
-        new CharsetRecog_windows_1256(),
-        new CharsetRecog_KOI8_R(),
-        new CharsetRecog_8859_9_tr(),
-        new CharsetRecog_sjis(),
-        new CharsetRecog_gb_18030(),
-        new CharsetRecog_euc_jp(),
-        new CharsetRecog_euc_kr(),
-        new CharsetRecog_big5(),
-
-        new CharsetRecog_2022JP(),
-        new CharsetRecog_2022KR(),
-        new CharsetRecog_2022CN(),
-
-        new CharsetRecog_IBM424_he_rtl(),
-        new CharsetRecog_IBM424_he_ltr(),
-        new CharsetRecog_IBM420_ar_rtl(),
-        new CharsetRecog_IBM420_ar_ltr()
+    CSRecognizerInfo *tempArray[] = {
+        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
     };
     int32_t rCount = ARRAY_SIZE(tempArray);
 
-    fCSRecognizers = NEW_ARRAY(CharsetRecognizer *, rCount);
+    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
 
     if (fCSRecognizers == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
-    } else {
+    } 
+    else {
         fCSRecognizers_size = rCount;
         for (int32_t r = 0; r < rCount; r += 1) {
             fCSRecognizers[r] = tempArray[r];
@@ -132,7 +147,8 @@ void CharsetDetector::setRecognizers(UErrorCode &status)
 
 CharsetDetector::CharsetDetector(UErrorCode &status)
   : textIn(new InputText(status)), resultArray(NULL),
-    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
+    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
+    fEnabledRecognizers(NULL)
 {
     if (U_FAILURE(status)) {
         return;
@@ -170,6 +186,10 @@ CharsetDetector::~CharsetDetector()
     }
 
     uprv_free(resultArray);
+
+    if (fEnabledRecognizers) {
+        uprv_free(fEnabledRecognizers);
+    }
 }
 
 void CharsetDetector::setText(const char *in, int32_t len)
@@ -234,7 +254,7 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
         // give a match quality > 0.
         resultCount = 0;
         for (i = 0; i < fCSRecognizers_size; i += 1) {
-            csr = fCSRecognizers[i];
+            csr = fCSRecognizers[i]->recognizer;
             if (csr->match(textIn, resultArray[resultCount])) {
                 resultCount++;
             }
@@ -251,6 +271,46 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
     return resultArray;
 }
 
+void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t modIdx = -1;
+    UBool isDefaultVal = FALSE;
+    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
+        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
+            modIdx = i;
+            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
+            break;
+        }
+    }
+    if (modIdx < 0) {
+        // No matching encoding found
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (fEnabledRecognizers == NULL && !isDefaultVal) {
+        // Create an array storing the non default setting
+        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
+        if (fEnabledRecognizers == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // Initialize the array with default info
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
+        }
+    }
+
+    if (fEnabledRecognizers != NULL) {
+        fEnabledRecognizers[modIdx] = enabled;
+    }
+}
+
 /*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
 {
     if( index > fCSRecognizers_size-1 || index < 0) {
@@ -267,6 +327,8 @@ U_NAMESPACE_END
 U_CDECL_BEGIN
 typedef struct {
     int32_t currIndex;
+    UBool all;
+    UBool *enabledRecognizers;
 } Context;
 
 
@@ -281,27 +343,73 @@ enumClose(UEnumeration *en) {
 }
 
 static int32_t U_CALLCONV
-enumCount(UEnumeration *, UErrorCode *) {
-    return fCSRecognizers_size;
+enumCount(UEnumeration *en, UErrorCode *) {
+    if (((Context *)en->context)->all) {
+        // ucsdet_getAllDetectableCharsets, all charset detector names
+        return fCSRecognizers_size;
+    }
+
+    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
+    int32_t count = 0;
+    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+    if (enabledArray != NULL) {
+        // custom set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (enabledArray[i]) {
+                count++;
+            }
+        }
+    } else {
+        // default set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (fCSRecognizers[i]->isDefaultEnabled) {
+                count++;
+            }
+        }
+    }
+    return count;
 }
 
 static const char* U_CALLCONV
 enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
-    if(((Context *)en->context)->currIndex >= fCSRecognizers_size) {
-        if(resultLength != NULL) {
-            *resultLength = 0;
+    const char *currName = NULL;
+
+    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
+        if (((Context *)en->context)->all) {
+            // ucsdet_getAllDetectableCharsets, all charset detector names
+            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+            ((Context *)en->context)->currIndex++;
+        } else {
+            // ucsdet_getDetectableCharsets
+            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+            if (enabledArray != NULL) {
+                // custome set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (enabledArray[((Context *)en->context)->currIndex]) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            } else {
+                // default set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            }
         }
-        return NULL;
     }
-    const char *currName = fCSRecognizers[((Context *)en->context)->currIndex]->getName();
+
     if(resultLength != NULL) {
-        *resultLength = (int32_t)uprv_strlen(currName);
+        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
     }
-    ((Context *)en->context)->currIndex++;
 
     return currName;
 }
 
+
 static void U_CALLCONV
 enumReset(UEnumeration *en, UErrorCode *) {
     ((Context *)en->context)->currIndex = 0;
@@ -317,25 +425,61 @@ static const UEnumeration gCSDetEnumeration = {
     enumReset
 };
 
-U_CAPI  UEnumeration * U_EXPORT2
-ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
 {
-    U_NAMESPACE_USE
 
-    if(U_FAILURE(*status)) {
+    /* Initialize recognized charsets. */
+    setRecognizers(status);
+
+    if(U_FAILURE(status)) {
         return 0;
     }
 
-    /* Initialize recognized charsets. */
-    CharsetDetector::getDetectableCount();
+    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
+    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = TRUE;
+    return en;
+}
+
+UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
+{
+    if(U_FAILURE(status)) {
+        return 0;
+    }
 
     UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
     memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
     en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
     uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = FALSE;
+    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
     return en;
 }
-U_CDECL_END
 
-#endif
+U_NAMESPACE_END
 
+#endif
\ No newline at end of file
diff --git a/icu4c/source/i18n/csdetect.h b/icu4c/source/i18n/csdetect.h
index 405e1f55800..15910453e84 100644
--- a/icu4c/source/i18n/csdetect.h
+++ b/icu4c/source/i18n/csdetect.h
@@ -1,6 +1,6 @@
 /*
  **********************************************************************
- *   Copyright (C) 2005-2006, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  */
@@ -28,6 +28,10 @@ private:
     UBool fFreshTextSet;
     static void setRecognizers(UErrorCode &status);
 
+    UBool *fEnabledRecognizers;  // If not null, active set of charset recognizers had
+                                // been changed from the default. The array index is
+                                // corresponding to fCSRecognizers. See setDetectableCharset().
+
 public:
     CharsetDetector(UErrorCode &status);
 
@@ -47,7 +51,12 @@ public:
 
 //    const char *getCharsetName(int32_t index, UErrorCode& status) const;
 
-    static int32_t getDetectableCount(); 
+    static int32_t getDetectableCount();
+
+
+    static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
+    UEnumeration * getDetectableCharsets(UErrorCode &status) const;
+    void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
 };
 
 U_NAMESPACE_END
diff --git a/icu4c/source/i18n/ucsdet.cpp b/icu4c/source/i18n/ucsdet.cpp
index d06dd336428..d702bdfffbc 100644
--- a/icu4c/source/i18n/ucsdet.cpp
+++ b/icu4c/source/i18n/ucsdet.cpp
@@ -1,6 +1,6 @@
 /*
  ********************************************************************************
- *   Copyright (C) 2005-2007, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ********************************************************************************
  */
@@ -11,6 +11,11 @@
 #include "unicode/ucsdet.h"
 #include "csdetect.h"
 #include "csmatch.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
 
 #include "cmemory.h"
 
@@ -175,6 +180,26 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
 
     return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
 }
+
+U_CAPI void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
+{
+    ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
+}
+
+U_CAPI  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+{
+    return CharsetDetector::getAllDetectableCharsets(*status);
+}
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
+{
+    return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
+}
+
 U_CDECL_END
 
+
 #endif
diff --git a/icu4c/source/i18n/unicode/ucsdet.h b/icu4c/source/i18n/unicode/ucsdet.h
index dc492ee229e..d9b86a38955 100644
--- a/icu4c/source/i18n/unicode/ucsdet.h
+++ b/icu4c/source/i18n/unicode/ucsdet.h
@@ -1,6 +1,6 @@
 /*
  **********************************************************************
- *   Copyright (C) 2005-2010, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucsdet.h
@@ -321,12 +321,21 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
   *  The returned UEnumeration provides access to the names of
   *  the charsets.
   *
+  *  <p>
   *  The state of the Charset detector that is passed in does not
   *  affect the result of this function, but requiring a valid, open
   *  charset detector as a parameter insures that the charset detection
   *  service has been safely initialized and that the required detection
   *  data is available.
   *
+  *  <p>
+  *  <b>Note:</b> Multiple different charset encodings in a same family may use
+  *  a single shared name in this implementation. For example, this method returns
+  *  an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+  *  (Windows Latin 1). However, actual detection result could be "windows-1252"
+  *  when the input data matches Latin 1 code points with any points only available
+  *  in "windows-1252".
+  *
   *  @param ucsd a Charset detector.
   *  @param status  Any error conditions are reported back in this variable.
   *  @return an iterator providing access to the detectable charset names.
@@ -335,7 +344,6 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
 U_STABLE  UEnumeration * U_EXPORT2
 ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
 
-
 /**
   *  Test whether input filtering is enabled for this charset detector.
   *  Input filtering removes text that appears to be HTML or xml
@@ -346,6 +354,7 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *statu
   *  @return TRUE if filtering is enabled.
   *  @stable ICU 3.6
   */
+
 U_STABLE  UBool U_EXPORT2
 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
 
@@ -364,6 +373,39 @@ ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
 U_STABLE  UBool U_EXPORT2
 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
 
+
+/**
+  *  Get an iterator over the set of detectable charsets -
+  *  over the charsets that are enabled by the specified charset detector.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names by
+  *  the specified charset detector.
+  *  @internal
+  */
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+/**
+  * Enable or disable individual charset encoding.
+  * A name of charset encoding must be included in the names returned by
+  * {@link #getAllDetectableCharsets()}.
+  *
+  * @param ucsd a Charset detector.
+  * @param encoding encoding the name of charset encoding.
+  * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+  *   charset encoding.
+  * @param status receives the return status. When the name of charset encoding
+  *   is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+  * @internal
+  */
+U_DRAFT void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+
 #endif
 #endif   /* __UCSDET_H */
 
diff --git a/icu4c/source/test/intltest/csdetest.cpp b/icu4c/source/test/intltest/csdetest.cpp
index cdb39cefeae..62b4eacb2b8 100644
--- a/icu4c/source/test/intltest/csdetest.cpp
+++ b/icu4c/source/test/intltest/csdetest.cpp
@@ -1,6 +1,6 @@
 /*
  **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  */
@@ -275,6 +275,45 @@ void CharsetDetectionTest::ConstructionTest()
         printf("%s\n", name);
 #endif
     }
+
+    const char* defDisabled[] = {
+        "IBM420_rtl", "IBM420_ltr",
+        "IBM424_rtl", "IBM424_ltr",
+        0
+    };
+
+    LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status));
+    const char *activeName = NULL;
+
+    while (activeName = uenum_next(eActive.getAlias(), NULL, status)) {
+        // the charset must be included in all list
+        UBool found = FALSE;
+
+        const char *name = NULL;
+        uenum_reset(e.getAlias(), status);
+        while (name = uenum_next(e.getAlias(), NULL, status)) {
+            if (strcmp(activeName, name) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+
+        if (!found) {
+            errln(UnicodeString(activeName) + " is not included in the all charset list.");
+        }
+
+        // some charsets are disabled by default
+        found = FALSE;
+        for (int32_t i = 0; defDisabled[i] != 0; i++) {
+            if (strcmp(activeName, defDisabled[i]) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found) {
+            errln(UnicodeString(activeName) + " should not be included in the default charset list.");
+        }
+    }
 }
 
 void CharsetDetectionTest::UTF8Test()
@@ -597,6 +636,10 @@ void CharsetDetectionTest::IBM424Test()
     char *bytes_r = extractBytes(s2, "IBM424", brLength);
     
     UCharsetDetector *csd = ucsdet_open(&status);
+	ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
     if (U_FAILURE(status)) {
         errln("Error opening charset detector. - %s", u_errorName(status));
     }
@@ -684,6 +727,10 @@ void CharsetDetectionTest::IBM420Test()
     if (U_FAILURE(status)) {
         errln("Error opening charset detector. - %s", u_errorName(status));
     }
+	ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
     const UCharsetMatch *match;
     const char *name;