ICU-10012 Disable EBCDIC Arabic/Hebrew detectors by default. Added APIs to enable...

author Yoshito Umaoka <y.umaoka@gmail.com>

Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)

committer Yoshito Umaoka <y.umaoka@gmail.com>

Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)
author Yoshito Umaoka <y.umaoka@gmail.com>
Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)
committer Yoshito Umaoka <y.umaoka@gmail.com>
Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)
diff --git a/icu4c/source/i18n/csdetect.cpp b/icu4c/source/i18n/csdetect.cpp

index c7f82b5525a41079c18065fd16eb378c93ad4b33..5115684d5f53257f6833d317d19d988ac0289458 100644 (file)
--- a/icu4c/source/i18n/csdetect.cpp
+++ b/icu4c/source/i18n/csdetect.cpp
@@ -32,7 +32,21 @@
  #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
  #define DELETE_ARRAY(array) uprv_free((void *) (array))
  
-static icu::CharsetRecognizer **fCSRecognizers = NULL;
+U_NAMESPACE_BEGIN
+
+struct CSRecognizerInfo : public UMemory {
+    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
+        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
+
+    ~CSRecognizerInfo() {delete recognizer;};
+
+    CharsetRecognizer *recognizer;
+    UBool isDefaultEnabled;
+};
+
+U_NAMESPACE_END
+
+static icu::CSRecognizerInfo **fCSRecognizers = NULL;
  static icu::UInitOnce gCSRecognizersInitOnce;
  static int32_t fCSRecognizers_size = 0;
  
@@ -70,47 +84,48 @@ charsetMatchComparator(const void * /*context*/, const void *left, const void *r
  static void U_CALLCONV initRecognizers(UErrorCode &status) {
      U_NAMESPACE_USE
      ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
-    CharsetRecognizer *tempArray[] = {
-        new CharsetRecog_UTF8(),
-
-        new CharsetRecog_UTF_16_BE(),
-        new CharsetRecog_UTF_16_LE(),
-        new CharsetRecog_UTF_32_BE(),
-        new CharsetRecog_UTF_32_LE(),
-
-        new CharsetRecog_8859_1(),
-        new CharsetRecog_8859_2(),
-        new CharsetRecog_8859_5_ru(),
-        new CharsetRecog_8859_6_ar(),
-        new CharsetRecog_8859_7_el(),
-        new CharsetRecog_8859_8_I_he(),
-        new CharsetRecog_8859_8_he(),
-        new CharsetRecog_windows_1251(),
-        new CharsetRecog_windows_1256(),
-        new CharsetRecog_KOI8_R(),
-        new CharsetRecog_8859_9_tr(),
-        new CharsetRecog_sjis(),
-        new CharsetRecog_gb_18030(),
-        new CharsetRecog_euc_jp(),
-        new CharsetRecog_euc_kr(),
-        new CharsetRecog_big5(),
-
-        new CharsetRecog_2022JP(),
-        new CharsetRecog_2022KR(),
-        new CharsetRecog_2022CN(),
-
-        new CharsetRecog_IBM424_he_rtl(),
-        new CharsetRecog_IBM424_he_ltr(),
-        new CharsetRecog_IBM420_ar_rtl(),
-        new CharsetRecog_IBM420_ar_ltr()
+    CSRecognizerInfo *tempArray[] = {
+        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
+
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
      };
      int32_t rCount = ARRAY_SIZE(tempArray);
  
-    fCSRecognizers = NEW_ARRAY(CharsetRecognizer *, rCount);
+    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
  
      if (fCSRecognizers == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
-    } else {
+    } 
+    else {
          fCSRecognizers_size = rCount;
          for (int32_t r = 0; r < rCount; r += 1) {
              fCSRecognizers[r] = tempArray[r];
@@ -132,7 +147,8 @@ void CharsetDetector::setRecognizers(UErrorCode &status)
  
  CharsetDetector::CharsetDetector(UErrorCode &status)
    : textIn(new InputText(status)), resultArray(NULL),
-    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
+    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
+    fEnabledRecognizers(NULL)
  {
      if (U_FAILURE(status)) {
          return;
@@ -170,6 +186,10 @@ CharsetDetector::~CharsetDetector()
      }
  
      uprv_free(resultArray);
+
+    if (fEnabledRecognizers) {
+        uprv_free(fEnabledRecognizers);
+    }
  }
  
  void CharsetDetector::setText(const char *in, int32_t len)
@@ -234,7 +254,7 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
          // give a match quality > 0.
          resultCount = 0;
          for (i = 0; i < fCSRecognizers_size; i += 1) {
-            csr = fCSRecognizers[i];
+            csr = fCSRecognizers[i]->recognizer;
              if (csr->match(textIn, resultArray[resultCount])) {
                  resultCount++;
              }
@@ -251,6 +271,46 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
      return resultArray;
  }
  
+void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t modIdx = -1;
+    UBool isDefaultVal = FALSE;
+    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
+        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
+            modIdx = i;
+            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
+            break;
+        }
+    }
+    if (modIdx < 0) {
+        // No matching encoding found
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (fEnabledRecognizers == NULL && !isDefaultVal) {
+        // Create an array storing the non default setting
+        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
+        if (fEnabledRecognizers == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // Initialize the array with default info
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
+        }
+    }
+
+    if (fEnabledRecognizers != NULL) {
+        fEnabledRecognizers[modIdx] = enabled;
+    }
+}
+
  /*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
  {
      if( index > fCSRecognizers_size-1 || index < 0) {
@@ -267,6 +327,8 @@ U_NAMESPACE_END
  U_CDECL_BEGIN
  typedef struct {
      int32_t currIndex;
+    UBool all;
+    UBool *enabledRecognizers;
  } Context;
  
  
@@ -281,27 +343,73 @@ enumClose(UEnumeration *en) {
  }
  
  static int32_t U_CALLCONV
-enumCount(UEnumeration *, UErrorCode *) {
-    return fCSRecognizers_size;
+enumCount(UEnumeration *en, UErrorCode *) {
+    if (((Context *)en->context)->all) {
+        // ucsdet_getAllDetectableCharsets, all charset detector names
+        return fCSRecognizers_size;
+    }
+
+    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
+    int32_t count = 0;
+    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+    if (enabledArray != NULL) {
+        // custom set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (enabledArray[i]) {
+                count++;
+            }
+        }
+    } else {
+        // default set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (fCSRecognizers[i]->isDefaultEnabled) {
+                count++;
+            }
+        }
+    }
+    return count;
  }
  
  static const char* U_CALLCONV
  enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
-    if(((Context *)en->context)->currIndex >= fCSRecognizers_size) {
-        if(resultLength != NULL) {
-            *resultLength = 0;
+    const char *currName = NULL;
+
+    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
+        if (((Context *)en->context)->all) {
+            // ucsdet_getAllDetectableCharsets, all charset detector names
+            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+            ((Context *)en->context)->currIndex++;
+        } else {
+            // ucsdet_getDetectableCharsets
+            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+            if (enabledArray != NULL) {
+                // custome set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (enabledArray[((Context *)en->context)->currIndex]) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            } else {
+                // default set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            }
          }
-        return NULL;
      }
-    const char *currName = fCSRecognizers[((Context *)en->context)->currIndex]->getName();
+
      if(resultLength != NULL) {
-        *resultLength = (int32_t)uprv_strlen(currName);
+        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
      }
-    ((Context *)en->context)->currIndex++;
  
      return currName;
  }
  
+
  static void U_CALLCONV
  enumReset(UEnumeration *en, UErrorCode *) {
      ((Context *)en->context)->currIndex = 0;
@@ -317,25 +425,61 @@ static const UEnumeration gCSDetEnumeration = {
      enumReset
  };
  
-U_CAPI  UEnumeration * U_EXPORT2
-ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
  {
-    U_NAMESPACE_USE
  
-    if(U_FAILURE(*status)) {
+    /* Initialize recognized charsets. */
+    setRecognizers(status);
+
+    if(U_FAILURE(status)) {
          return 0;
      }
  
-    /* Initialize recognized charsets. */
-    CharsetDetector::getDetectableCount();
+    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
+    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = TRUE;
+    return en;
+}
+
+UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
+{
+    if(U_FAILURE(status)) {
+        return 0;
+    }
  
      UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
      memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
      en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
      uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = FALSE;
+    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
      return en;
  }
-U_CDECL_END
  
-#endif
+U_NAMESPACE_END
  
+#endif
+\ No newline at end of file
diff --git a/icu4c/source/i18n/csdetect.h b/icu4c/source/i18n/csdetect.h

index 405e1f55800c305a95e852f636b4aa7d18823b44..15910453e84cf54944ac4cbb1068efba7e46241f 100644 (file)
--- a/icu4c/source/i18n/csdetect.h
+++ b/icu4c/source/i18n/csdetect.h
@@ -1,6 +1,6 @@
  /*
   **********************************************************************
- *   Copyright (C) 2005-2006, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   **********************************************************************
   */
@@ -28,6 +28,10 @@ private:
      UBool fFreshTextSet;
      static void setRecognizers(UErrorCode &status);
  
+    UBool *fEnabledRecognizers;  // If not null, active set of charset recognizers had
+                                // been changed from the default. The array index is
+                                // corresponding to fCSRecognizers. See setDetectableCharset().
+
  public:
      CharsetDetector(UErrorCode &status);
  
@@ -47,7 +51,12 @@ public:
  
  //    const char *getCharsetName(int32_t index, UErrorCode& status) const;
  
-    static int32_t getDetectableCount(); 
+    static int32_t getDetectableCount();
+
+
+    static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
+    UEnumeration * getDetectableCharsets(UErrorCode &status) const;
+    void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
  };
  
  U_NAMESPACE_END
diff --git a/icu4c/source/i18n/ucsdet.cpp b/icu4c/source/i18n/ucsdet.cpp

index d06dd3364281346e87dac0a7a0c218e7c5fd74ba..d702bdfffbceb2a6a7a932fca76c963e110314a4 100644 (file)
--- a/icu4c/source/i18n/ucsdet.cpp
+++ b/icu4c/source/i18n/ucsdet.cpp
@@ -1,6 +1,6 @@
  /*
   ********************************************************************************
- *   Copyright (C) 2005-2007, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   ********************************************************************************
   */
@@ -11,6 +11,11 @@
  #include "unicode/ucsdet.h"
  #include "csdetect.h"
  #include "csmatch.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"
  
  #include "cmemory.h"
  
@@ -175,6 +180,26 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
  
      return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
  }
+
+U_CAPI void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
+{
+    ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
+}
+
+U_CAPI  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+{
+    return CharsetDetector::getAllDetectableCharsets(*status);
+}
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
+{
+    return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
+}
+
  U_CDECL_END
  
+
  #endif
diff --git a/icu4c/source/i18n/unicode/ucsdet.h b/icu4c/source/i18n/unicode/ucsdet.h

index dc492ee229e0183a635d9b93647af91b7a9b2fe5..d9b86a38955defa44c4b4f43056a15cb39f85354 100644 (file)
--- a/icu4c/source/i18n/unicode/ucsdet.h
+++ b/icu4c/source/i18n/unicode/ucsdet.h
@@ -1,6 +1,6 @@
  /*
   **********************************************************************
- *   Copyright (C) 2005-2010, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   **********************************************************************
   *   file name:  ucsdet.h
@@ -321,12 +321,21 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
    *  The returned UEnumeration provides access to the names of
    *  the charsets.
    *
+  *  <p>
    *  The state of the Charset detector that is passed in does not
    *  affect the result of this function, but requiring a valid, open
    *  charset detector as a parameter insures that the charset detection
    *  service has been safely initialized and that the required detection
    *  data is available.
    *
+  *  <p>
+  *  <b>Note:</b> Multiple different charset encodings in a same family may use
+  *  a single shared name in this implementation. For example, this method returns
+  *  an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+  *  (Windows Latin 1). However, actual detection result could be "windows-1252"
+  *  when the input data matches Latin 1 code points with any points only available
+  *  in "windows-1252".
+  *
    *  @param ucsd a Charset detector.
    *  @param status  Any error conditions are reported back in this variable.
    *  @return an iterator providing access to the detectable charset names.
@@ -335,7 +344,6 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
  U_STABLE  UEnumeration * U_EXPORT2
  ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
  
-
  /**
    *  Test whether input filtering is enabled for this charset detector.
    *  Input filtering removes text that appears to be HTML or xml
@@ -346,6 +354,7 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *statu
    *  @return TRUE if filtering is enabled.
    *  @stable ICU 3.6
    */
+
  U_STABLE  UBool U_EXPORT2
  ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
  
@@ -364,6 +373,39 @@ ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
  U_STABLE  UBool U_EXPORT2
  ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
  
+
+/**
+  *  Get an iterator over the set of detectable charsets -
+  *  over the charsets that are enabled by the specified charset detector.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names by
+  *  the specified charset detector.
+  *  @internal
+  */
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+/**
+  * Enable or disable individual charset encoding.
+  * A name of charset encoding must be included in the names returned by
+  * {@link #getAllDetectableCharsets()}.
+  *
+  * @param ucsd a Charset detector.
+  * @param encoding encoding the name of charset encoding.
+  * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+  *   charset encoding.
+  * @param status receives the return status. When the name of charset encoding
+  *   is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+  * @internal
+  */
+U_DRAFT void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+
  #endif
  #endif   /* __UCSDET_H */
  
diff --git a/icu4c/source/test/intltest/csdetest.cpp b/icu4c/source/test/intltest/csdetest.cpp

index cdb39cefeaeed1ef992ecf4765a3be122ea0d97e..62b4eacb2b85be3d6329532260616c891270cec6 100644 (file)
--- a/icu4c/source/test/intltest/csdetest.cpp
+++ b/icu4c/source/test/intltest/csdetest.cpp
@@ -1,6 +1,6 @@
  /*
   **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   **********************************************************************
   */
@@ -275,6 +275,45 @@ void CharsetDetectionTest::ConstructionTest()
          printf("%s\n", name);
  #endif
      }
+
+    const char* defDisabled[] = {
+        "IBM420_rtl", "IBM420_ltr",
+        "IBM424_rtl", "IBM424_ltr",
+        0
+    };
+
+    LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status));
+    const char *activeName = NULL;
+
+    while (activeName = uenum_next(eActive.getAlias(), NULL, status)) {
+        // the charset must be included in all list
+        UBool found = FALSE;
+
+        const char *name = NULL;
+        uenum_reset(e.getAlias(), status);
+        while (name = uenum_next(e.getAlias(), NULL, status)) {
+            if (strcmp(activeName, name) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+
+        if (!found) {
+            errln(UnicodeString(activeName) + " is not included in the all charset list.");
+        }
+
+        // some charsets are disabled by default
+        found = FALSE;
+        for (int32_t i = 0; defDisabled[i] != 0; i++) {
+            if (strcmp(activeName, defDisabled[i]) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found) {
+            errln(UnicodeString(activeName) + " should not be included in the default charset list.");
+        }
+    }
  }
  
  void CharsetDetectionTest::UTF8Test()
@@ -597,6 +636,10 @@ void CharsetDetectionTest::IBM424Test()
      char *bytes_r = extractBytes(s2, "IBM424", brLength);
      
      UCharsetDetector *csd = ucsdet_open(&status);
+       ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
      if (U_FAILURE(status)) {
          errln("Error opening charset detector. - %s", u_errorName(status));
      }
@@ -684,6 +727,10 @@ void CharsetDetectionTest::IBM420Test()
      if (U_FAILURE(status)) {
          errln("Error opening charset detector. - %s", u_errorName(status));
      }
+       ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
      const UCharsetMatch *match;
      const char *name;
author	Yoshito Umaoka <y.umaoka@gmail.com>
	Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)
committer	Yoshito Umaoka <y.umaoka@gmail.com>
	Tue, 17 Sep 2013 06:57:53 +0000 (06:57 +0000)
icu4c/source/i18n/csdetect.cpp		patch \| blob \| history
icu4c/source/i18n/csdetect.h		patch \| blob \| history
icu4c/source/i18n/ucsdet.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/ucsdet.h		patch \| blob \| history
icu4c/source/test/intltest/csdetest.cpp		patch \| blob \| history