ICU-8605 document & test ICU4C dependencies, remove cycles, reduce some deps; merged...

author Markus Scherer <markus.icu@gmail.com>

Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)
author Markus Scherer <markus.icu@gmail.com>
Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)
diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in

index 2fb2365383b5b7d57d6504f8e41d8fbac30dfa04..f99a5db1d857beaa4d85ed16bacf28171f25c309 100644 (file)
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -90,12 +90,13 @@ stringtriebuilder.o bytestriebuilder.o \
  bytestrie.o bytestrieiterator.o \
  ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
  appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
-utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
+utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
+unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
  normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
  chariter.o schriter.o uchriter.o uiter.o \
  patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
  uscript.o usc_impl.o unames.o \
-utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
+utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
  uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
  rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
  serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
diff --git a/icu4c/source/common/bytestriebuilder.cpp b/icu4c/source/common/bytestriebuilder.cpp

index c643583b0baf84b7ed166bf5339d2a24960e35ca..4f4adfb2313e6f76fe8061cb57c6727a65ec7033 100644 (file)
--- a/icu4c/source/common/bytestriebuilder.cpp
+++ b/icu4c/source/common/bytestriebuilder.cpp
@@ -21,6 +21,7 @@
  #include "uhash.h"
  #include "uarrsort.h"
  #include "uassert.h"
+#include "ustr_imp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -335,7 +336,7 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar
  
  BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
          : LinearMatchNode(len, nextNode), s(bytes) {
-    hash=hash*37+uhash_hashCharsN(bytes, len);
+    hash=hash*37+ustr_hashCharsN(bytes, len);
  }
  
  UBool
diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp

index e6b0e83778e587b095df5161ad24595691e65219..1eaf6d20f214e20d7f5690f37ad33fcd4028599f 100644 (file)
--- a/icu4c/source/common/caniter.cpp
+++ b/icu4c/source/common/caniter.cpp
@@ -1,6 +1,6 @@
  /*
   *****************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and  *
+ * Copyright (C) 1996-2011, International Business Machines Corporation and  *
   * others. All Rights Reserved.                                              *
   *****************************************************************************
   */
@@ -288,7 +288,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
      if(U_FAILURE(status)) {
          return;
      }
-    subpermute.setValueDeleter(uhash_deleteUnicodeString);
+    subpermute.setValueDeleter(uprv_deleteUObject);
  
      for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
          cp = source.char32At(i);
@@ -345,9 +345,9 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
      if (U_FAILURE(status)) {
          return 0;
      }
-    result.setValueDeleter(uhash_deleteUnicodeString);
-    permutations.setValueDeleter(uhash_deleteUnicodeString);
-    basic.setValueDeleter(uhash_deleteUnicodeString);
+    result.setValueDeleter(uprv_deleteUObject);
+    permutations.setValueDeleter(uprv_deleteUObject);
+    basic.setValueDeleter(uprv_deleteUObject);
  
      UChar USeg[256];
      int32_t segLen = segment.extract(USeg, 256, status);
@@ -453,7 +453,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
          while (iter.next()) {
              UChar32 cp2 = iter.getCodepoint();
              Hashtable remainder(status);
-            remainder.setValueDeleter(uhash_deleteUnicodeString);
+            remainder.setValueDeleter(uprv_deleteUObject);
              if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
                  continue;
              }
diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h

index 64e701aca54626f773db03c25333cd0be5bb63c0..d5e08a5338c46b7cb41efba0dab3e78d5f764731 100644 (file)
--- a/icu4c/source/common/cmemory.h
+++ b/icu4c/source/common/cmemory.h
@@ -91,6 +91,22 @@ cmemory_inUse(void);
  U_CFUNC UBool 
  cmemory_cleanup(void);
  
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
  #ifdef XP_CPLUSPLUS
  
  U_NAMESPACE_BEGIN
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj

index 8afd9ec3531096e76ec193fbc02f29a054df52ac..cc2c3c23eaab6f90939e0a28bf17adadfe463280 100644 (file)
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -383,6 +383,7 @@
      <ClCompile Include="unifilt.cpp" />\r
      <ClCompile Include="unifunct.cpp" />\r
      <ClCompile Include="uniset.cpp" />\r
+    <ClCompile Include="uniset_closure.cpp" />\r
      <ClCompile Include="uniset_props.cpp" />\r
      <ClCompile Include="unisetspan.cpp" />\r
      <ClCompile Include="uprops.cpp" />\r
@@ -414,6 +415,7 @@
      <ClCompile Include="stringpiece.cpp" />\r
      <ClCompile Include="stringtriebuilder.cpp" />\r
      <ClCompile Include="ucasemap.cpp" />\r
+    <ClCompile Include="ucasemap_titlecase_brkiter.cpp" />\r
      <ClCompile Include="ucharstrie.cpp" />\r
      <ClCompile Include="ucharstriebuilder.cpp" />\r
      <ClCompile Include="ucharstrieiterator.cpp" />\r
@@ -422,11 +424,15 @@
      <ClCompile Include="uiter.cpp" />\r
      <ClCompile Include="unistr.cpp" />\r
      <ClCompile Include="unistr_case.cpp" />\r
+    <ClCompile Include="unistr_case_locale.cpp" />\r
      <ClCompile Include="unistr_cnv.cpp" />\r
      <ClCompile Include="unistr_props.cpp" />\r
+    <ClCompile Include="unistr_titlecase_brkiter.cpp" />\r
      <ClCompile Include="ustr_cnv.c" />\r
+    <ClCompile Include="ustr_titlecase_brkiter.cpp" />\r
      <ClCompile Include="ustr_wcs.cpp" />\r
      <ClCompile Include="ustrcase.cpp" />\r
+    <ClCompile Include="ustrcase_locale.cpp" />\r
      <ClCompile Include="ustring.cpp" />\r
      <ClCompile Include="ustrtrns.cpp" />\r
      <ClCompile Include="utext.cpp" />\r
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters

index 562c42063317af0efe8eecb83ce1b424e1e1d157..292b171183c70fe27bfe0594ed9e3398eadd329d 100644 (file)
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -391,6 +391,9 @@
      <ClCompile Include="uniset.cpp">\r
        <Filter>properties &amp; sets</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="uniset_closure.cpp">\r
+      <Filter>properties &amp; sets</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="uniset_props.cpp">\r
        <Filter>properties &amp; sets</Filter>\r
      </ClCompile>\r
@@ -466,6 +469,9 @@
      <ClCompile Include="ucasemap.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="ucasemap_titlecase_brkiter.cpp">\r
+      <Filter>strings</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="uchriter.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
@@ -481,21 +487,33 @@
      <ClCompile Include="unistr_case.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="unistr_case_locale.cpp">\r
+      <Filter>strings</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="unistr_cnv.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
      <ClCompile Include="unistr_props.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="unistr_titlecase_brkiter.cpp">\r
+      <Filter>strings</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="ustr_cnv.c">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="ustr_titlecase_brkiter.cpp">\r
+      <Filter>strings</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="ustr_wcs.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
      <ClCompile Include="ustrcase.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
+    <ClCompile Include="ustrcase_locale.cpp">\r
+      <Filter>strings</Filter>\r
+    </ClCompile>\r
      <ClCompile Include="ustring.cpp">\r
        <Filter>strings</Filter>\r
      </ClCompile>\r
diff --git a/icu4c/source/common/hash.h b/icu4c/source/common/hash.h

index 9fedd0e521f9440e5d67dc1083dc621a42f1106e..57467daf218aabcec123c6808e24a27e1e6b95d2 100644 (file)
--- a/icu4c/source/common/hash.h
+++ b/icu4c/source/common/hash.h
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   Date        Name        Description
@@ -13,6 +13,7 @@
  
  #include "unicode/unistr.h"
  #include "unicode/uobject.h"
+#include "cmemory.h"
  #include "uhash.h"
  
  U_NAMESPACE_BEGIN
@@ -108,7 +109,7 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp,
      uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
      if (U_SUCCESS(status)) {
          hash = &hashObj;
-        uhash_setKeyDeleter(hash, uhash_deleteUnicodeString);
+        uhash_setKeyDeleter(hash, uprv_deleteUObject);
      }
  }
  
diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp

index 8c5fdb61d9cc16f13a9766d496cb661c4d507171..93f074f4972cf3a80c34bc505de0db2d582ef699 100644 (file)
--- a/icu4c/source/common/normalizer2.cpp
+++ b/icu4c/source/common/normalizer2.cpp
@@ -844,7 +844,18 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
  
  // Some properties APIs ---------------------------------------------------- ***
  
-U_CFUNC UNormalizationCheckResult U_EXPORT2
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return impl->getCC(impl->getNorm16(c));
+    } else {
+        return 0;
+    }
+}
+
+U_CFUNC UNormalizationCheckResult
  unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
      if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
          return UNORM_YES;
@@ -858,6 +869,17 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
      }
  }
  
+U_CFUNC uint16_t
+unorm_getFCD16Simple(UChar32 c) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
+    if(U_SUCCESS(errorCode)) {
+        return UTRIE2_GET16(trie, c);
+    } else {
+        return 0;
+    }
+}
+
  U_CAPI const uint16_t * U_EXPORT2
  unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
      const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp

index 11d0581072d03a7b7fbef60262d1c3b6c39d08d9..87b34408e662b021482b742ff8397fdc1eb80c12 100644 (file)
--- a/icu4c/source/common/normalizer2impl.cpp
+++ b/icu4c/source/common/normalizer2impl.cpp
@@ -25,7 +25,6 @@
  #include "mutex.h"
  #include "normalizer2impl.h"
  #include "uassert.h"
-#include "uhash.h"
  #include "uset_imp.h"
  #include "utrie2.h"
  #include "uvector.h"
@@ -1713,7 +1712,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l
  
  CanonIterData::CanonIterData(UErrorCode &errorCode) :
          trie(utrie2_open(0, 0, &errorCode)),
-        canonStartSets(uhash_deleteUObject, NULL, errorCode) {}
+        canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
  
  CanonIterData::~CanonIterData() {
      utrie2_close(trie);
diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h

index 15b6d8c8b38d8ab643962fe2e5da536342e65696..4ff2386ee83ece04f851b8bca84a5de21fa99f3a 100644 (file)
--- a/icu4c/source/common/normalizer2impl.h
+++ b/icu4c/source/common/normalizer2impl.h
@@ -547,9 +547,16 @@ unorm2_swap(const UDataSwapper *ds,
   * Get the NF*_QC property for a code point, for u_getIntPropertyValue().
   * @internal
   */
-U_CFUNC UNormalizationCheckResult U_EXPORT2
+U_CFUNC UNormalizationCheckResult
  unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
  
+/**
+ * Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
+ * @internal
+ */
+U_CFUNC uint16_t
+unorm_getFCD16Simple(UChar32 c);
+
  /**
   * Internal API, used by collation code.
   * Get access to the internal FCD trie table to be able to perform
diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp

index 1aafdaf41814c0d001a91fcf29dd190b86298044..6d5d9357e0ff43b77ef1ad5188dcd99dfbdd4c9b 100644 (file)
--- a/icu4c/source/common/propname.cpp
+++ b/icu4c/source/common/propname.cpp
@@ -12,6 +12,7 @@
  #include "propname.h"
  #include "unicode/uchar.h"
  #include "unicode/udata.h"
+#include "unicode/uscript.h"
  #include "umutex.h"
  #include "cmemory.h"
  #include "cstring.h"
@@ -312,3 +313,15 @@ u_getPropertyValueEnum(UProperty property,
      U_NAMESPACE_USE
      return PropNameData::getPropertyValueEnum(property, alias);
  }
+
+U_CAPI const char*  U_EXPORT2
+uscript_getName(UScriptCode scriptCode){
+    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+                                  U_LONG_PROPERTY_NAME);
+}
+
+U_CAPI const char*  U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode){
+    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+                                  U_SHORT_PROPERTY_NAME);
+}
diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp

index bba16e9ef248094959a2d5f85156243b2f2a4ebf..ddec6873c52d80239825cc5f4d07473246de619a 100644 (file)
--- a/icu4c/source/common/rbbiscan.cpp
+++ b/icu4c/source/common/rbbiscan.cpp
@@ -23,7 +23,6 @@
  #include "unicode/uchriter.h"
  #include "unicode/parsepos.h"
  #include "unicode/parseerr.h"
-#include "util.h"
  #include "cmemory.h"
  #include "cstring.h"
  
@@ -122,18 +121,14 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
      //            and the time to build these few sets should be small compared to a
      //            full break iterator build.
      fRuleSets[kRuleSet_rule_char-128]       = UnicodeSet(gRuleSet_rule_char_pattern,       *rb->fStatus);
-    UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus);
-    if (U_FAILURE(*rb->fStatus)) {
-        return;
-    }
-    fRuleSets[kRuleSet_white_space-128]     = *whitespaceSet;
-    delete whitespaceSet;
+    // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
+    fRuleSets[kRuleSet_white_space-128].add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
      fRuleSets[kRuleSet_name_char-128]       = UnicodeSet(gRuleSet_name_char_pattern,       *rb->fStatus);
      fRuleSets[kRuleSet_name_start_char-128] = UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
      fRuleSets[kRuleSet_digit_char-128]      = UnicodeSet(gRuleSet_digit_char_pattern,      *rb->fStatus);
      if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
          // This case happens if ICU's data is missing.  UnicodeSet tries to look up property
-        //   names from the init string, can't find them, and claims an illegal arguement.
+        //   names from the init string, can't find them, and claims an illegal argument.
          //   Change the error so that the actual problem will be clearer to users.
          *rb->fStatus = U_BRK_INIT_ERROR;
      }
@@ -1146,12 +1141,11 @@ void RBBIRuleScanner::scanSet() {
      pos.setIndex(fScanIndex);
      startPos = fScanIndex;
      UErrorCode localStatus = U_ZERO_ERROR;
-    uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE,
-                         fSymbolTable,
-                         localStatus);
+    uset = new UnicodeSet();
      if (uset == NULL) {
          localStatus = U_MEMORY_ALLOCATION_ERROR;
      }
+    uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
      if (U_FAILURE(localStatus)) {
          //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
          //         UnicodeSet appears to not be reporting correctly at this time.
diff --git a/icu4c/source/common/serv.cpp b/icu4c/source/common/serv.cpp

index 8ce591804d86495ca5bba0d4a9bdd993f86da32c..7e5ca53107d5ecef3170d00436a8283e784ad7d6 100644 (file)
--- a/icu4c/source/common/serv.cpp
+++ b/icu4c/source/common/serv.cpp
@@ -278,7 +278,7 @@ public:
      DNCache(const Locale& _locale) 
          : cache(), locale(_locale) 
      {
-        // cache.setKeyDeleter(uhash_deleteUnicodeString);
+        // cache.setKeyDeleter(uprv_deleteUObject);
      }
  };
  
@@ -519,7 +519,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
              // fallback to the one that succeeded, we want to hit the
              // cache the first time next goaround.
              if (cacheDescriptorList._obj == NULL) {
-                cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status);
+                cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
                  if (U_FAILURE(status)) {
                      return NULL;
                  }
diff --git a/icu4c/source/common/servls.cpp b/icu4c/source/common/servls.cpp

index b3c78c78e3fda1e8bc84b9582348dddbd954c6cd..570c10a2628edf9f83e5a3e37acf3071cf70b56c 100644 (file)
--- a/icu4c/source/common/servls.cpp
+++ b/icu4c/source/common/servls.cpp
@@ -15,7 +15,6 @@
  #include "cmemory.h"
  #include "servloc.h"
  #include "ustrfmt.h"
-#include "uhash.h"
  #include "charstr.h"
  #include "ucln_cmn.h"
  #include "uassert.h"
@@ -163,7 +162,7 @@ private:
      ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
          : _service(service)
          , _timestamp(service->getTimestamp())
-        , _ids(uhash_deleteUnicodeString, NULL, status)
+        , _ids(uprv_deleteUObject, NULL, status)
          , _pos(0)
      {
          _service->getVisibleIDs(_ids, status);
@@ -172,7 +171,7 @@ private:
      ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
          : _service(other._service)
          , _timestamp(other._timestamp)
-        , _ids(uhash_deleteUnicodeString, NULL, status)
+        , _ids(uprv_deleteUObject, NULL, status)
          , _pos(0)
      {
          if(U_SUCCESS(status)) {
diff --git a/icu4c/source/common/stringtriebuilder.cpp b/icu4c/source/common/stringtriebuilder.cpp

index f016cb535ae228e39bfd897e4f846a1cb9dfcb94..f30b13862b021bbbad77c03446bc390604f3181f 100644 (file)
--- a/icu4c/source/common/stringtriebuilder.cpp
+++ b/icu4c/source/common/stringtriebuilder.cpp
@@ -51,7 +51,7 @@ StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode
          errorCode=U_MEMORY_ALLOCATION_ERROR;
      }
      if(U_SUCCESS(errorCode)) {
-        uhash_setKeyDeleter(nodes, uhash_deleteUObject);
+        uhash_setKeyDeleter(nodes, uprv_deleteUObject);
      }
  }
  
diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp

index d7543df87ddd20fbdfa7336d69b62cffa6199f43..0b52975d072a7f4673b54457514f89a99f20461d 100644 (file)
--- a/icu4c/source/common/ucasemap.cpp
+++ b/icu4c/source/common/ucasemap.cpp
@@ -17,11 +17,12 @@
  */
  
  #include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
  #include "unicode/uloc.h"
  #include "unicode/ustring.h"
  #include "unicode/ucasemap.h"
  #if !UCONFIG_NO_BREAK_ITERATION
-#include "unicode/ubrk.h"
  #include "unicode/utext.h"
  #endif
  #include "cmemory.h"
@@ -29,6 +30,8 @@
  #include "ucase.h"
  #include "ustr_imp.h"
  
+U_NAMESPACE_USE
+
  /* UCaseMap service object -------------------------------------------------- */
  
  U_CAPI UCaseMap * U_EXPORT2
@@ -60,7 +63,8 @@ U_CAPI void U_EXPORT2
  ucasemap_close(UCaseMap *csm) {
      if(csm!=NULL) {
  #if !UCONFIG_NO_BREAK_ITERATION
-        ubrk_close(csm->iter);
+        // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
+        delete reinterpret_cast<BreakIterator *>(csm->iter);
  #endif
          uprv_free(csm);
      }
@@ -106,21 +110,6 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/
      csm->options=options;
  }
  
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI const UBreakIterator * U_EXPORT2
-ucasemap_getBreakIterator(const UCaseMap *csm) {
-    return csm->iter;
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
-    ubrk_close(csm->iter);
-    csm->iter=iterToAdopt;
-}
-
-#endif
-
  /* UTF-8 string case mappings ----------------------------------------------- */
  
  /* TODO(markus): Move to a new, separate utf8case.c file. */
@@ -262,37 +251,29 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
  
  #if !UCONFIG_NO_BREAK_ITERATION
  
-/*
- * Internal titlecasing function.
- */
-static int32_t
-_toTitle(UCaseMap *csm,
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
           uint8_t *dest, int32_t destCapacity,
-         const uint8_t *src, UCaseContext *csc,
-         int32_t srcLength,
+         const uint8_t *src, int32_t srcLength,
           UErrorCode *pErrorCode) {
-    UText utext=UTEXT_INITIALIZER;
      const UChar *s;
      UChar32 c;
      int32_t prev, titleStart, titleLimit, idx, destIndex, length;
      UBool isFirstIndex;
  
-    utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
      if(U_FAILURE(*pErrorCode)) {
          return 0;
      }
-    if(csm->iter==NULL) {
-        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
-                            NULL, 0,
-                            pErrorCode);
-    }
-    ubrk_setUText(csm->iter, &utext, pErrorCode);
-    if(U_FAILURE(*pErrorCode)) {
-        utext_close(&utext);
-        return 0;
-    }
+
+    // Use the C++ abstract base class to minimize dependencies.
+    // TODO: Change UCaseMap.iter to store a BreakIterator directly.
+    BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
  
      /* set up local variables */
+    int32_t locCache=csm->locCache;
+    UCaseContext csc=UCASECONTEXT_INITIALIZER;
+    csc.p=(void *)src;
+    csc.limit=srcLength;
      destIndex=0;
      prev=0;
      isFirstIndex=TRUE;
@@ -302,9 +283,9 @@ _toTitle(UCaseMap *csm,
          /* find next index where to titlecase */
          if(isFirstIndex) {
              isFirstIndex=FALSE;
-            idx=ubrk_first(csm->iter);
+            idx=bi->first();
          } else {
-            idx=ubrk_next(csm->iter);
+            idx=bi->next();
          }
          if(idx==UBRK_DONE || idx>srcLength) {
              idx=srcLength;
@@ -354,15 +335,14 @@ _toTitle(UCaseMap *csm,
  
              if(titleStart<titleLimit) {
                  /* titlecase c which is from [titleStart..titleLimit[ */
-                csc->cpStart=titleStart;
-                csc->cpLimit=titleLimit;
-                c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
+                csc.cpStart=titleStart;
+                csc.cpLimit=titleLimit;
+                c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
                  destIndex=appendResult(dest, destIndex, destCapacity, c, s);
  
-                
                  /* Special case Dutch IJ titlecasing */
                  if ( titleStart+1 < idx && 
-                     ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
+                     ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
                       ( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) &&
                       ( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) { 
                              c=0x004A;
@@ -377,7 +357,7 @@ _toTitle(UCaseMap *csm,
                              _caseMap(
                                  csm, ucase_toFullLower,
                                  dest+destIndex, destCapacity-destIndex,
-                                src, csc,
+                                src, &csc,
                                  titleLimit, idx,
                                  pErrorCode);
                      } else {
@@ -398,12 +378,41 @@ _toTitle(UCaseMap *csm,
      if(destIndex>destCapacity) {
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
      }
-    utext_close(&utext);
      return destIndex;
  }
  
  #endif
  
+static int32_t U_CALLCONV
+ucasemap_internalUTF8ToLower(const UCaseMap *csm,
+                             uint8_t *dest, int32_t destCapacity,
+                             const uint8_t *src, int32_t srcLength,
+                             UErrorCode *pErrorCode) {
+    UCaseContext csc=UCASECONTEXT_INITIALIZER;
+    csc.p=(void *)src;
+    csc.limit=srcLength;
+    return _caseMap(
+        csm, ucase_toFullLower,
+        dest, destCapacity,
+        src, &csc, 0, srcLength,
+        pErrorCode);
+}
+
+static int32_t U_CALLCONV
+ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
+                             uint8_t *dest, int32_t destCapacity,
+                             const uint8_t *src, int32_t srcLength,
+                             UErrorCode *pErrorCode) {
+    UCaseContext csc=UCASECONTEXT_INITIALIZER;
+    csc.p=(void *)src;
+    csc.limit=srcLength;
+    return _caseMap(
+        csm, ucase_toFullUpper,
+        dest, destCapacity,
+        src, &csc, 0, srcLength,
+        pErrorCode);
+}
+
  static int32_t
  utf8_foldCase(const UCaseProps *csp,
                uint8_t *dest, int32_t destCapacity,
@@ -442,19 +451,20 @@ utf8_foldCase(const UCaseProps *csp,
      return destIndex;
  }
  
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
-/* common internal function for public API functions */
+static int32_t U_CALLCONV
+ucasemap_internalUTF8Fold(const UCaseMap *csm,
+                          uint8_t *dest, int32_t destCapacity,
+                          const uint8_t *src, int32_t srcLength,
+                          UErrorCode *pErrorCode) {
+    return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
+}
  
-static int32_t
-caseMap(const UCaseMap *csm,
-        uint8_t *dest, int32_t destCapacity,
-        const uint8_t *src, int32_t srcLength,
-        int32_t toWhichCase,
-        UErrorCode *pErrorCode) {
+U_CFUNC int32_t
+ucasemap_mapUTF8(const UCaseMap *csm,
+                 uint8_t *dest, int32_t destCapacity,
+                 const uint8_t *src, int32_t srcLength,
+                 UTF8CaseMapper *stringCaseMapper,
+                 UErrorCode *pErrorCode) {
      int32_t destLength;
  
      /* check argument values */
@@ -484,42 +494,7 @@ caseMap(const UCaseMap *csm,
          return 0;
      }
  
-    destLength=0;
-
-    if(toWhichCase==FOLD_CASE) {
-        destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength,
-                                 csm->options, pErrorCode);
-    } else {
-        UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
-        csc.p=(void *)src;
-        csc.limit=srcLength;
-
-        if(toWhichCase==TO_LOWER) {
-            destLength=_caseMap(csm, ucase_toFullLower,
-                                dest, destCapacity,
-                                src, &csc,
-                                0, srcLength,
-                                pErrorCode);
-        } else if(toWhichCase==TO_UPPER) {
-            destLength=_caseMap(csm, ucase_toFullUpper,
-                                dest, destCapacity,
-                                src, &csc,
-                                0, srcLength,
-                                pErrorCode);
-        } else /* if(toWhichCase==TO_TITLE) */ {
-#if UCONFIG_NO_BREAK_ITERATION
-            *pErrorCode=U_UNSUPPORTED_ERROR;
-#else
-            /* UCaseMap is actually non-const in toTitle() APIs. */
-            UCaseMap *tmp = (UCaseMap *)csm;
-            destLength=_toTitle(tmp, dest, destCapacity,
-                                src, &csc, srcLength,
-                                pErrorCode);
-#endif
-        }
-    }
-
+    destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
      return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
  }
  
@@ -530,10 +505,10 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
                       char *dest, int32_t destCapacity,
                       const char *src, int32_t srcLength,
                       UErrorCode *pErrorCode) {
-    return caseMap(csm,
+    return ucasemap_mapUTF8(csm,
                     (uint8_t *)dest, destCapacity,
                     (const uint8_t *)src, srcLength,
-                   TO_LOWER, pErrorCode);
+                   ucasemap_internalUTF8ToLower, pErrorCode);
  }
  
  U_CAPI int32_t U_EXPORT2
@@ -541,34 +516,19 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
                       char *dest, int32_t destCapacity,
                       const char *src, int32_t srcLength,
                       UErrorCode *pErrorCode) {
-    return caseMap(csm,
-                   (uint8_t *)dest, destCapacity,
-                   (const uint8_t *)src, srcLength,
-                   TO_UPPER, pErrorCode);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8ToTitle(UCaseMap *csm,
-                     char *dest, int32_t destCapacity,
-                     const char *src, int32_t srcLength,
-                     UErrorCode *pErrorCode) {
-    return caseMap(csm,
+    return ucasemap_mapUTF8(csm,
                     (uint8_t *)dest, destCapacity,
                     (const uint8_t *)src, srcLength,
-                   TO_TITLE, pErrorCode);
+                   ucasemap_internalUTF8ToUpper, pErrorCode);
  }
  
-#endif
-
  U_CAPI int32_t U_EXPORT2
  ucasemap_utf8FoldCase(const UCaseMap *csm,
                        char *dest, int32_t destCapacity,
                        const char *src, int32_t srcLength,
                        UErrorCode *pErrorCode) {
-    return caseMap(csm,
+    return ucasemap_mapUTF8(csm,
                     (uint8_t *)dest, destCapacity,
                     (const uint8_t *)src, srcLength,
-                   FOLD_CASE, pErrorCode);
+                   ucasemap_internalUTF8Fold, pErrorCode);
  }
diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp

new file mode 100644 (file)

index 0000000..1698c8e
--- /dev/null
+++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp
@@ -0,0 +1,67 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ucasemap_titlecase_brkiter.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011jun02
+*   created by: Markus W. Scherer
+*
+*   Titlecasing functions that are based on BreakIterator
+*   were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/ucasemap.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+U_CAPI const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm) {
+    return csm->iter;
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
+    // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
+    delete reinterpret_cast<BreakIterator *>(csm->iter);
+    csm->iter=iterToAdopt;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+                     char *dest, int32_t destCapacity,
+                     const char *src, int32_t srcLength,
+                     UErrorCode *pErrorCode) {
+    UText utext=UTEXT_INITIALIZER;
+    utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(csm->iter==NULL) {
+        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
+                            NULL, 0,
+                            pErrorCode);
+    }
+    ubrk_setUText(csm->iter, &utext, pErrorCode);
+    int32_t length=ucasemap_mapUTF8(csm,
+                   (uint8_t *)dest, destCapacity,
+                   (const uint8_t *)src, srcLength,
+                   ucasemap_internalUTF8ToTitle, pErrorCode);
+    utext_close(&utext);
+    return length;
+}
+
+#endif  // !UCONFIG_NO_BREAK_ITERATION
diff --git a/icu4c/source/common/ucharstriebuilder.cpp b/icu4c/source/common/ucharstriebuilder.cpp

index b1aa0d1b8eaa11ca7d52517d9a17ca8c7b533dd3..24b46f5083725cbc8e7d7283ee9db402c03b59ce 100644 (file)
--- a/icu4c/source/common/ucharstriebuilder.cpp
+++ b/icu4c/source/common/ucharstriebuilder.cpp
@@ -21,6 +21,7 @@
  #include "uarrsort.h"
  #include "uassert.h"
  #include "uhash.h"
+#include "ustr_imp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -283,7 +284,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha
  
  UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
          : LinearMatchNode(len, nextNode), s(units) {
-    hash=hash*37+uhash_hashUCharsN(units, len);
+    hash=hash*37+ustr_hashUCharsN(units, len);
  }
  
  UBool
diff --git a/icu4c/source/common/uchriter.cpp b/icu4c/source/common/uchriter.cpp

index f2878815e4f003fc168ebbe5e98bb3885e870186..6de1e8b8f57e839b84fa0bfe7acccc431ae56df2 100644 (file)
--- a/icu4c/source/common/uchriter.cpp
+++ b/icu4c/source/common/uchriter.cpp
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-* Copyright (C) 1998-2010, International Business Machines Corporation and
+* Copyright (C) 1998-2011, International Business Machines Corporation and
  * others. All Rights Reserved.
  ******************************************************************************
  */
@@ -9,7 +9,7 @@
  
  #include "unicode/uchriter.h"
  #include "unicode/ustring.h"
-#include "uhash.h"
+#include "ustr_imp.h"
  
  U_NAMESPACE_BEGIN
  
@@ -83,7 +83,7 @@ UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
  
  int32_t
  UCharCharacterIterator::hashCode() const {
-    return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
+    return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
  }
  
  CharacterIterator*
diff --git a/icu4c/source/common/ucln.h b/icu4c/source/common/ucln.h

index 3f9847f44484477373f301666e42b366de25c527..951ad192d91081f47cecffa3dce8aae28c268bce 100644 (file)
--- a/icu4c/source/common/ucln.h
+++ b/icu4c/source/common/ucln.h
@@ -1,11 +1,11 @@
  /*
  ******************************************************************************
-*                                                                            *
-* Copyright (C) 2001-2010, International Business Machines                   *
-*                Corporation and others. All Rights Reserved.                *
-*                                                                            *
+*
+* Copyright (C) 2001-2011, International Business Machines
+*                Corporation and others. All Rights Reserved.
+*
  ******************************************************************************
-*   file name:  ucln_cmn.h
+*   file name:  ucln.h
  *   encoding:   US-ASCII
  *   tab size:   8 (not used)
  *   indentation:4
@@ -18,6 +18,7 @@
  #define __UCLN_H__
  
  #include "unicode/utypes.h"
+#include "umutex.h"
  
  /** These are the functions used to register a library's memory cleanup
   * functions.  Each library should define a single library register function
@@ -81,9 +82,16 @@ U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type,
  /**
   * Request cleanup for one specific library.
   * Not thread safe.
- * Calling this with UCLN_COMMON just calls u_cleanup();
   * @param type which library to cleanup
   */
  U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
  
+/* ucln_cmn.c variables shared with uinit.c */
+U_CDECL_BEGIN
+
+extern UBool gICUInitialized;
+extern UMTX  gICUInitMutex;
+
+U_CDECL_END
+
  #endif
diff --git a/icu4c/source/common/ucln_cmn.c b/icu4c/source/common/ucln_cmn.c

index 498c15e29f86395f6eeb3ef4b327909fa7fb6a09..45358736bd25a8a9e9aafc8efc6256ccc4da1f5c 100644 (file)
--- a/icu4c/source/common/ucln_cmn.c
+++ b/icu4c/source/common/ucln_cmn.c
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-* Copyright (C) 2001-2010, International Business Machines
+* Copyright (C) 2001-2011, International Business Machines
  *                Corporation and others. All Rights Reserved.
  ******************************************************************************
  *   file name:  ucln_cmn.c
@@ -15,7 +15,6 @@
  #include "unicode/utypes.h"
  #include "unicode/uclean.h"
  #include "utracimp.h"
-#include "ustr_imp.h"
  #include "ucln_cmn.h"
  #include "umutex.h"
  #include "ucln.h"
@@ -23,24 +22,43 @@
  #include "uassert.h"
  
  /**  Auto-client for UCLN_COMMON **/
-#define UCLN_TYPE UCLN_COMMON
+#define UCLN_TYPE_IS_COMMON
  #include "ucln_imp.h"
  
+U_CDECL_BEGIN
+
+UBool gICUInitialized = FALSE;
+UMTX  gICUInitMutex   = NULL;
+
+U_CDECL_END
+
  static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
  static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
  
+/************************************************
+ The cleanup order is important in this function.
+ Please be sure that you have read ucln.h
+ ************************************************/
+U_CAPI void U_EXPORT2
+u_cleanup(void)
+{
+    UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
+    umtx_lock(NULL);     /* Force a memory barrier, so that we are sure to see   */
+    umtx_unlock(NULL);   /*   all state left around by any other threads.        */
  
-/* Enables debugging information about when a library is cleaned up. */
-#ifndef UCLN_DEBUG_CLEANUP
-#define UCLN_DEBUG_CLEANUP 0
-#endif
-
+    ucln_lib_cleanup();
  
-#if defined(UCLN_DEBUG_CLEANUP)
-#include <stdio.h>
-#endif
+    umtx_destroy(&gICUInitMutex);
+    umtx_cleanup();
+    cmemory_cleanup();       /* undo any heap functions set by u_setMemoryFunctions(). */
+    gICUInitialized = FALSE;
+    UTRACE_EXIT();           /* Must be before utrace_cleanup(), which turns off tracing. */
+/*#if U_ENABLE_TRACING*/
+    utrace_cleanup();
+/*#endif*/
+}
  
-static void ucln_cleanup_internal(ECleanupLibraryType libType) 
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) 
  {
      if (gLibCleanupFunctions[libType])
      {
@@ -49,22 +67,6 @@ static void ucln_cleanup_internal(ECleanupLibraryType libType)
      }
  }
  
-U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
-{
-    if(libType==UCLN_COMMON) {
-#if UCLN_DEBUG_CLEANUP
-        fprintf(stderr, "Cleaning up: UCLN_COMMON with u_cleanup, type %d\n", (int)libType);
-#endif
-        u_cleanup();
-    } else {
-#if UCLN_DEBUG_CLEANUP
-        fprintf(stderr, "Cleaning up: using ucln_cleanup_internal, type %d\n", (int)libType);
-#endif
-        ucln_cleanup_internal(libType);
-    }
-}
-
-
  U_CFUNC void
  ucln_common_registerCleanup(ECleanupCommonType type,
                              cleanupFunc *func)
@@ -95,7 +97,7 @@ U_CFUNC UBool ucln_lib_cleanup(void) {
      ECleanupCommonType commonFunc = UCLN_COMMON_START;
  
      for (libType++; libType<UCLN_COMMON; libType++) {
-        ucln_cleanup_internal(libType);
+        ucln_cleanupOne(libType);
      }
  
      for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
diff --git a/icu4c/source/common/ucln_imp.h b/icu4c/source/common/ucln_imp.h

index 9268729f7fdbd37f5225d195906948d462a113ea..2608fb2c7559d98c42c26f79ee7f3f2314e1b008 100644 (file)
--- a/icu4c/source/common/ucln_imp.h
+++ b/icu4c/source/common/ucln_imp.h
@@ -1,9 +1,9 @@
  /*
  ******************************************************************************
-*                                                                            *
-* Copyright (C) 2009, International Business Machines                   *
-*                Corporation and others. All Rights Reserved.                *
-*                                                                            *
+*
+* Copyright (C) 2009-2011, International Business Machines
+*                Corporation and others. All Rights Reserved.
+*
  ******************************************************************************
  *   file name:  ucln_imp.h
  *   encoding:   US-ASCII
@@ -56,6 +56,12 @@
   */
  /*static void ucln_unRegisterAutomaticCleanup();*/
  
+#ifdef UCLN_TYPE_IS_COMMON
+#   define UCLN_CLEAN_ME_UP u_cleanup()
+#else
+#   define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
+#endif
+
  /* ------------ automatic cleanup: registration. Choose ONE ------- */
  #if defined(UCLN_AUTO_LOCAL)
  /* To use:
@@ -75,7 +81,7 @@ static UBool gAutoCleanRegistered = FALSE;
  
  static void ucln_atexit_handler()
  {
-    ucln_cleanupOne(UCLN_TYPE);
+    UCLN_CLEAN_ME_UP;
  }
  
  static void ucln_registerAutomaticCleanup()
@@ -101,7 +107,7 @@ U_CAPI void U_EXPORT2 UCLN_FINI (void);
  U_CAPI void U_EXPORT2 UCLN_FINI ()
  {
      /* This function must be defined, if UCLN_FINI is defined, else link error. */
-     ucln_cleanupOne(UCLN_TYPE);
+     UCLN_CLEAN_ME_UP;
  }
  #elif defined(__GNUC__)
  /* GCC - use __attribute((destructor)) */
@@ -109,7 +115,7 @@ static void ucln_destructor()   __attribute__((destructor)) ;
  
  static void ucln_destructor() 
  {
-    ucln_cleanupOne(UCLN_TYPE);
+    UCLN_CLEAN_ME_UP;
  }
  
  /* Windows: DllMain */
@@ -145,7 +151,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
          case DLL_PROCESS_DETACH:
              /* Here is the one we actually care about. */
  
-            ucln_cleanupOne(UCLN_TYPE);
+            UCLN_CLEAN_ME_UP;
  
              break;
  
diff --git a/icu4c/source/common/ucnvisci.c b/icu4c/source/common/ucnvisci.c

index 411085226c98e13f7dc58a6e36ffaca83e6037db..77a763d63144a172efdabe7d8429b37441684f4f 100644 (file)
--- a/icu4c/source/common/ucnvisci.c
+++ b/icu4c/source/common/ucnvisci.c
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2000-2009, International Business Machines
+*   Copyright (C) 2000-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnvisci.c
@@ -24,7 +24,6 @@
  #include "unicode/ucnv.h"
  #include "ucnv_cnv.h"
  #include "unicode/ucnv_cb.h"
-#include "unicode/uset.h"
  #include "cstring.h"
  
  #define UCNV_OPTIONS_VERSION_MASK 0xf
@@ -63,9 +62,6 @@
  #define PNJ_HA              0x0A39
  #define PNJ_RRA             0x0A5C
  
-static USet* PNJ_BINDI_TIPPI_SET= NULL;
-static USet* PNJ_CONSONANT_SET= NULL;
-
  typedef enum {
      DEVANAGARI =0,
      BENGALI,
@@ -151,24 +147,40 @@ static const LookupDataStruct lookupInitialData[]={
      { MALAYALAM,  MLM_MASK,  MLM }
  };
  
-static void initializeSets() {
-    /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
-    PNJ_CONSONANT_SET = uset_open(0,0);
-    uset_clear(PNJ_CONSONANT_SET);
-
-    uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
-    uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
-    uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
-    uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
-    
-    PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
-    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
-    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
-    uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
-    uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
-    
-    uset_compact(PNJ_CONSONANT_SET);
-    uset_compact(PNJ_BINDI_TIPPI_SET);
+/*
+ * For special handling of certain Gurmukhi characters.
+ * Bit 0 (value 1): PNJ consonant
+ * Bit 1 (value 2): PNJ Bindi Tippi
+ */
+static const uint8_t pnjMap[80] = {
+    /* 0A00..0A0F */
+    0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
+    /* 0A10..0A1F */
+    0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
+    /* 0A20..0A2F */
+    3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
+    /* 0A30..0A3F */
+    3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
+    /* 0A40..0A4F */
+    0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static UBool
+isPNJConsonant(UChar32 c) {
+    if (c < 0xa00 || 0xa50 <= c) {
+        return FALSE;
+    } else {
+        return (UBool)(pnjMap[c - 0xa00] & 1);
+    }
+}
+
+static UBool
+isPNJBindiTippi(UChar32 c) {
+    if (c < 0xa00 || 0xa50 <= c) {
+        return FALSE;
+    } else {
+        return (UBool)(pnjMap[c - 0xa00] >> 1);
+    }
  }
  
  static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
@@ -176,9 +188,6 @@ static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *e
          return;
      }
  
-    /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
-    initializeSets();
-    
      cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
  
      if (cnv->extraInfo != NULL) {
@@ -225,14 +234,6 @@ static void _ISCIIClose(UConverter *cnv) {
          }
          cnv->extraInfo=NULL;
      }
-    if (PNJ_CONSONANT_SET != NULL) {
-        uset_close(PNJ_CONSONANT_SET);
-        PNJ_CONSONANT_SET = NULL;
-    }
-    if (PNJ_BINDI_TIPPI_SET != NULL) {
-        uset_close(PNJ_BINDI_TIPPI_SET);
-        PNJ_BINDI_TIPPI_SET = NULL;
-    }
  }
  
  static const char* _ISCIIgetName(const UConverter* cnv) {
@@ -1031,7 +1032,7 @@ static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
              converterData->contextCharFromUnicode = 0x00;
              break;
          }
-        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
+        if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
              /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
              /* reset context char */
              converterData->contextCharFromUnicode = 0x0000;
@@ -1425,7 +1426,7 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar
  
              if (*toUnicodeStatus != missingCharMarker) {
                  /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
-                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
+                if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
                          (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
                      /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
                      offset = (int)(source-args->source - 3);
@@ -1444,10 +1445,10 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar
                      /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 
                       * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
                       */
-                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+                    if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
                          targetUniChar = PNJ_TIPPI - PNJ_DELTA;
                          WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
-                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+                    } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
                          /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
                          data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
                      } else {
diff --git a/icu4c/source/common/ucnvsel.cpp b/icu4c/source/common/ucnvsel.cpp

index cc626eecd2e5b0473f104e796c62f5350b922abb..03744df94d4201872ab9f1e3a3afa21eb140693c 100644 (file)
--- a/icu4c/source/common/ucnvsel.cpp
+++ b/icu4c/source/common/ucnvsel.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2008-2009, International Business Machines
+*   Copyright (C) 2008-2011, International Business Machines
  *   Corporation, Google and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -26,6 +26,8 @@
  
  #include "unicode/ucnvsel.h"
  
+#if !UCONFIG_NO_CONVERSION
+
  #include <string.h>
  
  #include "unicode/uchar.h"
@@ -809,3 +811,5 @@ ucnvsel_selectForUTF8(const UConverterSelector* sel,
    }
    return selectForMask(sel, mask, status);
  }
+
+#endif  // !UCONFIG_NO_CONVERSION
diff --git a/icu4c/source/common/uhash.c b/icu4c/source/common/uhash.c

index 41c47162d93498302f3c9923b67d1dc89b63e86a..5dd6ba3441c8131e9e143f85d4d8e480de745594 100644 (file)
--- a/icu4c/source/common/uhash.c
+++ b/icu4c/source/common/uhash.c
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   Date        Name        Description
@@ -15,6 +15,7 @@
  #include "cstring.h"
  #include "cmemory.h"
  #include "uassert.h"
+#include "ustr_imp.h"
  
  /* This hashtable is implemented as a double hash.  All elements are
   * stored in a single array with no secondary storage for collision
@@ -832,58 +833,26 @@ uhash_tokp(void* p) {
   * PUBLIC Key Hash Functions
   ********************************************************************/
  
-/*
-  Compute the hash by iterating sparsely over about 32 (up to 63)
-  characters spaced evenly through the string.  For each character,
-  multiply the previous hash value by a prime number and add the new
-  character in, like a linear congruential random number generator,
-  producing a pseudorandom deterministic value well distributed over
-  the output range. [LIU]
-*/
-
-#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
-    int32_t hash = 0;                         \
-    const TYPE *p = (const TYPE*) STR;        \
-    if (p != NULL) {                          \
-        int32_t len = (int32_t)(STRLEN);      \
-        int32_t inc = ((len - 32) / 32) + 1;  \
-        const TYPE *limit = p + len;          \
-        while (p<limit) {                     \
-            hash = (hash * 37) + DEREF;       \
-            p += inc;                         \
-        }                                     \
-    }                                         \
-    return hash
-
  U_CAPI int32_t U_EXPORT2
  uhash_hashUChars(const UHashTok key) {
-    STRING_HASH(UChar, key.pointer, u_strlen(p), *p);
-}
-
-/* Used by UnicodeString to compute its hashcode - Not public API. */
-U_CAPI int32_t U_EXPORT2
-uhash_hashUCharsN(const UChar *str, int32_t length) {
-    STRING_HASH(UChar, str, length, *p);
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashCharsN(const char *str, int32_t length) {
-    STRING_HASH(char, str, length, *p);
+    const UChar *s = (const UChar *)key.pointer;
+    return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
  }
  
  U_CAPI int32_t U_EXPORT2
  uhash_hashChars(const UHashTok key) {
-    STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), *p);
+    const char *s = (const char *)key.pointer;
+    return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
  }
  
  U_CAPI int32_t U_EXPORT2
  uhash_hashIChars(const UHashTok key) {
-    STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), uprv_tolower(*p));
+    const char *s = (const char *)key.pointer;
+    return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
  }
  
  U_CAPI UBool U_EXPORT2 
  uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
-    
      int32_t count1, count2, pos, i;
  
      if(hash1==hash2){
@@ -1002,13 +971,3 @@ U_CAPI UBool U_EXPORT2
  uhash_compareLong(const UHashTok key1, const UHashTok key2) {
      return (UBool)(key1.integer == key2.integer);
  }
-
-/********************************************************************
- * PUBLIC Deleter Functions
- ********************************************************************/
-
-U_CAPI void U_EXPORT2
-uhash_freeBlock(void *obj) {
-    uprv_free(obj);
-}
-
diff --git a/icu4c/source/common/uhash.h b/icu4c/source/common/uhash.h

index 9b86e0801a984cbbb35de71528cd8d9ca0e11d42..bf3275c59015095417f85c91d1d624a2e59fe08e 100644 (file)
--- a/icu4c/source/common/uhash.h
+++ b/icu4c/source/common/uhash.h
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   Date        Name        Description
@@ -14,6 +14,7 @@
  #define UHASH_H
  
  #include "unicode/utypes.h"
+#include "cmemory.h"
  
  /**
   * UHashtable stores key-value pairs and does moderately fast lookup
@@ -125,14 +126,8 @@ typedef UBool U_CALLCONV UKeyComparator(const UHashTok key1,
   */
  typedef UBool U_CALLCONV UValueComparator(const UHashTok val1,
                                            const UHashTok val2);
-/**
- * A function called by <TT>uhash_remove</TT>,
- * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
- * an existing key or value.
- * @param obj A key or value stored in a hashtable
- * @see uhash_deleteUObject
- */
-typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
  
  /**
   * This specifies whether or not, and how, the hastable resizes itself.
@@ -579,13 +574,6 @@ uhash_hashUChars(const UHashTok key);
  U_CAPI int32_t U_EXPORT2 
  uhash_hashChars(const UHashTok key);
  
-/* Used by UnicodeString to compute its hashcode - Not public API. */
-U_CAPI int32_t U_EXPORT2 
-uhash_hashUCharsN(const UChar *key, int32_t length);
-
-U_CAPI int32_t U_EXPORT2 
-uhash_hashCharsN(const char *key, int32_t length);
-
  /**
   * Generate a case-insensitive hash code for a null-terminated char*
   * string.  If the string is not null-terminated do not use this
@@ -666,13 +654,6 @@ uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2);
  U_CAPI UBool U_EXPORT2 
  uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2);
  
-/**
- * Deleter function for UnicodeString* keys or values.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2 
-uhash_deleteUnicodeString(void *obj);
-
  /********************************************************************
   * int32_t Support Functions
   ********************************************************************/
@@ -705,20 +686,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2);
  U_CAPI void U_EXPORT2 
  uhash_deleteHashtable(void *obj);
  
-/**
- * Deleter for UObject instances.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2 
-uhash_deleteUObject(void *obj);
-
-/**
- * Deleter for any key or value allocated using uprv_malloc.  Calls
- * uprv_free.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2 
-uhash_freeBlock(void *obj);
+/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
  
  /**
   * Checks if the given hash tables are equal or not.
diff --git a/icu4c/source/common/uhash_us.cpp b/icu4c/source/common/uhash_us.cpp

index c4ca3ca76e5d452278aca46e4f6b4c40c4917954..71a41cbe29a2bef5c2352133d4820cc8294b7638 100644 (file)
--- a/icu4c/source/common/uhash_us.cpp
+++ b/icu4c/source/common/uhash_us.cpp
@@ -1,6 +1,6 @@
  /*
  ******************************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  ******************************************************************************
  *   Date        Name        Description
@@ -10,42 +10,7 @@
  ******************************************************************************
  */
  
-#include "uhash.h"
  #include "hash.h"
-#include "uvector.h"
-#include "unicode/unistr.h"
-#include "unicode/uchar.h"
-
-/********************************************************************
- * PUBLIC UnicodeString support functions for UHashtable
- ********************************************************************/
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashUnicodeString(const UHashTok key) {
-    U_NAMESPACE_USE
-    const UnicodeString *str = (const UnicodeString*) key.pointer;
-    return (str == NULL) ? 0 : str->hashCode();
-}
-
-U_CAPI void U_EXPORT2
-uhash_deleteUnicodeString(void *obj) {
-    U_NAMESPACE_USE
-    delete (UnicodeString*) obj;
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
-    U_NAMESPACE_USE
-    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
-    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
-    if (str1 == str2) {
-        return TRUE;
-    }
-    if (str1 == NULL || str2 == NULL) {
-        return FALSE;
-    }
-    return *str1 == *str2;
-}
  
  /**
   * Deleter for Hashtable objects.
@@ -56,13 +21,4 @@ uhash_deleteHashtable(void *obj) {
      delete (Hashtable*) obj;
  }
  
-/**
- * Deleter for UObject instances.
- */
-U_CAPI void U_EXPORT2
-uhash_deleteUObject(void *obj) {
-    U_NAMESPACE_USE
-    delete (UObject*) obj;
-}
-
  //eof
diff --git a/icu4c/source/common/uinit.c b/icu4c/source/common/uinit.c

index 530d21307ec7f6b6110dffd9242ee7c3a4b99e22..bdbf9102e3bf41ead2f95f59cafb291f252f0e0b 100644 (file)
--- a/icu4c/source/common/uinit.c
+++ b/icu4c/source/common/uinit.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *                                                                            *
-* Copyright (C) 2001-2010, International Business Machines                   *
+* Copyright (C) 2001-2011, International Business Machines                   *
  *                Corporation and others. All Rights Reserved.                *
  *                                                                            *
  ******************************************************************************
@@ -19,40 +19,11 @@
  #include "unicode/uclean.h"
  #include "cmemory.h"
  #include "icuplugimp.h"
-#include "uassert.h"
  #include "ucln.h"
-#include "ucln_cmn.h"
  #include "ucnv_io.h"
  #include "umutex.h"
  #include "utracimp.h"
  
-static UBool gICUInitialized = FALSE;
-static UMTX  gICUInitMutex   = NULL;
-
-
-/************************************************
- The cleanup order is important in this function.
- Please be sure that you have read ucln.h
- ************************************************/
-U_CAPI void U_EXPORT2
-u_cleanup(void)
-{
-    UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
-    umtx_lock(NULL);     /* Force a memory barrier, so that we are sure to see   */
-    umtx_unlock(NULL);   /*   all state left around by any other threads.        */
-
-    ucln_lib_cleanup();
-
-    umtx_destroy(&gICUInitMutex);
-    umtx_cleanup();
-    cmemory_cleanup();       /* undo any heap functions set by u_setMemoryFunctions(). */
-    gICUInitialized = FALSE;
-    UTRACE_EXIT();           /* Must be before utrace_cleanup(), which turns off tracing. */
-/*#if U_ENABLE_TRACING*/
-    utrace_cleanup();
-/*#endif*/
-}
-
  /*
   * ICU Initialization Function. Need not be called.
   */
diff --git a/icu4c/source/common/unicode/ucnvsel.h b/icu4c/source/common/unicode/ucnvsel.h

index 0830003f558d597e5f5107631e4f924445096914..eb9588eb2dbef011a77c76c565e8fa8013ed7cbd 100644 (file)
--- a/icu4c/source/common/unicode/ucnvsel.h
+++ b/icu4c/source/common/unicode/ucnvsel.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2008-2010, International Business Machines
+*   Copyright (C) 2008-2011, International Business Machines
  *   Corporation, Google and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -19,8 +19,11 @@
  #ifndef __ICU_UCNV_SEL_H__
  #define __ICU_UCNV_SEL_H__
  
-#include "unicode/uset.h"
  #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/uset.h"
  #include "unicode/utf16.h"
  #include "unicode/uenum.h"
  #include "unicode/ucnv.h"
@@ -179,4 +182,6 @@ U_STABLE UEnumeration * U_EXPORT2
  ucnvsel_selectForUTF8(const UConverterSelector* sel,
                        const char *s, int32_t length, UErrorCode *status);
  
+#endif  /* !UCONFIG_NO_CONVERSION */
+
  #endif  /* __ICU_UCNV_SEL_H__ */
diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h

index ea8d1d901e0006f0e730d4cf81ffdc79f7aba4a3..79c5d2615148525b2967f2f758f14b00fbc18670 100644 (file)
--- a/icu4c/source/common/unicode/uniset.h
+++ b/icu4c/source/common/unicode/uniset.h
@@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN
  
  class BMPSet;
  class ParsePosition;
+class RBBIRuleScanner;
  class SymbolTable;
  class UnicodeSetStringSpan;
  class UVector;
@@ -1467,6 +1468,7 @@ private:
      virtual UBool matchesIndexValue(uint8_t v) const;
  
  private:
+    friend class RBBIRuleScanner;
  
      //----------------------------------------------------------------
      // Implementation: Clone as thawed (see ICU4J Freezable)
@@ -1478,10 +1480,16 @@ private:
      // Implementation: Pattern parsing
      //----------------------------------------------------------------
  
+    void applyPatternIgnoreSpace(const UnicodeString& pattern,
+                                 ParsePosition& pos,
+                                 const SymbolTable* symbols,
+                                 UErrorCode& status);
+
      void applyPattern(RuleCharacterIterator& chars,
                        const SymbolTable* symbols,
                        UnicodeString& rebuiltPat,
                        uint32_t options,
+                      UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                        UErrorCode& ec);
  
      //----------------------------------------------------------------
diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h

index 4507829b848c70094fb7e1343cf53dd58f84cb73..cf8b736ed8544acbf63040877e03f164b2cfb958 100644 (file)
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@@ -31,6 +31,7 @@
  #include "unicode/std_string.h"
  #include "unicode/stringpiece.h"
  #include "unicode/bytestream.h"
+#include "unicode/ucasemap.h"
  
  struct UConverter;          // unicode/ucnv.h
  class  StringThreadTest;
@@ -53,6 +54,21 @@ U_STABLE int32_t U_EXPORT2
  u_strlen(const UChar *s);
  #endif
  
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * Internal string case mapping function type.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+                  UChar *dest, int32_t destCapacity,
+                  const UChar *src, int32_t srcLength,
+                  UErrorCode *pErrorCode);
+
+#endif
+
  U_NAMESPACE_BEGIN
  
  class BreakIterator;        // unicode/brkiter.h
@@ -3355,12 +3371,13 @@ private:
                              int32_t **pBufferToDelete = 0,
                              UBool forceClone = FALSE);
  
-  // common function for case mappings
+  /**
+   * Common function for UnicodeString case mappings.
+   * The stringCaseMapper has the same type UStringCaseMapper
+   * as in ustr_imp.h for ustrcase_map().
+   */
    UnicodeString &
-  caseMap(BreakIterator *titleIter,
-          const char *locale,
-          uint32_t options,
-          int32_t toWhichCase);
+  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
  
    // ref counting
    void addRef(void);
diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp

index ed50e1758c26d1a2be6fde0883b93707b5dcbfa2..9ff9ae58f2a4c26f43d040b17ade4bfcbf4a9bd8 100644 (file)
--- a/icu4c/source/common/uniset.cpp
+++ b/icu4c/source/common/uniset.cpp
@@ -1559,7 +1559,7 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) {
      if (U_FAILURE(status)) {
          return FALSE;
      }
-    strings = new UVector(uhash_deleteUnicodeString,
+    strings = new UVector(uprv_deleteUObject,
                            uhash_compareUnicodeString, 1, status);
      if (strings == NULL) { // Check for memory allocation error.
          status = U_MEMORY_ALLOCATION_ERROR;
diff --git a/icu4c/source/common/uniset_closure.cpp b/icu4c/source/common/uniset_closure.cpp

new file mode 100644 (file)

index 0000000..1419793
--- /dev/null
+++ b/icu4c/source/common/uniset_closure.cpp
@@ -0,0 +1,280 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uniset_closure.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011may30
+*   created by: Markus W. Scherer
+*
+*   UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp
+*   to simplify dependencies.
+*   In particular, this depends on the BreakIterator, but the BreakIterator
+*   code also builds UnicodeSets from patterns and needs uniset_props.
+*/
+
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "ruleiter.h"
+#include "ucase.h"
+#include "util.h"
+#include "uvector.h"
+
+// initial storage. Must be >= 0
+// *** same as in uniset.cpp ! ***
+#define START_EXTRA 16
+
+U_NAMESPACE_BEGIN
+
+// TODO memory debugging provided inside uniset.cpp
+// could be made available here but probably obsolete with use of modern
+// memory leak checker tools
+#define _dbgct(me)
+
+//----------------------------------------------------------------
+// Constructors &c
+//----------------------------------------------------------------
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern,
+                       uint32_t options,
+                       const SymbolTable* symbols,
+                       UErrorCode& status) :
+    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
+    fFlags(0)
+{
+    if(U_SUCCESS(status)){
+        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
+        /* test for NULL */
+        if(list == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;  
+        }else{
+            allocateStrings(status);
+            applyPattern(pattern, options, symbols, status);
+        }
+    }
+    _dbgct(this);
+}
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+                       uint32_t options,
+                       const SymbolTable* symbols,
+                       UErrorCode& status) :
+    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
+    fFlags(0)
+{
+    if(U_SUCCESS(status)){
+        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
+        /* test for NULL */
+        if(list == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;   
+        }else{
+            allocateStrings(status);
+            applyPattern(pattern, pos, options, symbols, status);
+        }
+    }
+    _dbgct(this);
+}
+
+//----------------------------------------------------------------
+// Public API
+//----------------------------------------------------------------
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+                                     uint32_t options,
+                                     const SymbolTable* symbols,
+                                     UErrorCode& status) {
+    ParsePosition pos(0);
+    applyPattern(pattern, pos, options, symbols, status);
+    if (U_FAILURE(status)) return *this;
+
+    int32_t i = pos.getIndex();
+
+    if (options & USET_IGNORE_SPACE) {
+        // Skip over trailing whitespace
+        ICU_Utility::skipWhitespace(pattern, i, TRUE);
+    }
+
+    if (i != pattern.length()) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+    return *this;
+}
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+                              ParsePosition& pos,
+                              uint32_t options,
+                              const SymbolTable* symbols,
+                              UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return *this;
+    }
+    if (isFrozen()) {
+        status = U_NO_WRITE_PERMISSION;
+        return *this;
+    }
+    // Need to build the pattern in a temporary string because
+    // _applyPattern calls add() etc., which set pat to empty.
+    UnicodeString rebuiltPat;
+    RuleCharacterIterator chars(pattern, symbols, pos);
+    applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
+    if (U_FAILURE(status)) return *this;
+    if (chars.inVariable()) {
+        // syntaxError(chars, "Extra chars in variable value");
+        status = U_MALFORMED_SET;
+        return *this;
+    }
+    setPattern(rebuiltPat);
+    return *this;
+}
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+    ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+    ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+//----------------------------------------------------------------
+// Case folding API
+//----------------------------------------------------------------
+
+// add the result of a full case mapping to the set
+// use str as a temporary string to avoid constructing one
+static inline void
+addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
+    if(result >= 0) {
+        if(result > UCASE_MAX_STRING_LENGTH) {
+            // add a single-code point case mapping
+            set.add(result);
+        } else {
+            // add a string case mapping from full with length result
+            str.setTo((UBool)FALSE, full, result);
+            set.add(str);
+        }
+    }
+    // result < 0: the code point mapped to itself, no need to add it
+    // see ucase.h
+}
+
+UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
+    if (isFrozen() || isBogus()) {
+        return *this;
+    }
+    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
+        const UCaseProps *csp = ucase_getSingleton();
+        {
+            UnicodeSet foldSet(*this);
+            UnicodeString str;
+            USetAdder sa = {
+                foldSet.toUSet(),
+                _set_add,
+                _set_addRange,
+                _set_addString,
+                NULL, // don't need remove()
+                NULL // don't need removeRange()
+            };
+
+            // start with input set to guarantee inclusion
+            // USET_CASE: remove strings because the strings will actually be reduced (folded);
+            //            therefore, start with no strings and add only those needed
+            if (attribute & USET_CASE_INSENSITIVE) {
+                foldSet.strings->removeAllElements();
+            }
+
+            int32_t n = getRangeCount();
+            UChar32 result;
+            const UChar *full;
+            int32_t locCache = 0;
+
+            for (int32_t i=0; i<n; ++i) {
+                UChar32 start = getRangeStart(i);
+                UChar32 end   = getRangeEnd(i);
+
+                if (attribute & USET_CASE_INSENSITIVE) {
+                    // full case closure
+                    for (UChar32 cp=start; cp<=end; ++cp) {
+                        ucase_addCaseClosure(csp, cp, &sa);
+                    }
+                } else {
+                    // add case mappings
+                    // (does not add long s for regular s, or Kelvin for k, for example)
+                    for (UChar32 cp=start; cp<=end; ++cp) {
+                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);
+
+                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);
+
+                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
+                        addCaseMapping(foldSet, result, full, str);
+
+                        result = ucase_toFullFolding(csp, cp, &full, 0);
+                        addCaseMapping(foldSet, result, full, str);
+                    }
+                }
+            }
+            if (strings != NULL && strings->size() > 0) {
+                if (attribute & USET_CASE_INSENSITIVE) {
+                    for (int32_t j=0; j<strings->size(); ++j) {
+                        str = *(const UnicodeString *) strings->elementAt(j);
+                        str.foldCase();
+                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
+                            foldSet.add(str); // does not map to code points: add the folded string itself
+                        }
+                    }
+                } else {
+                    Locale root("");
+#if !UCONFIG_NO_BREAK_ITERATION
+                    UErrorCode status = U_ZERO_ERROR;
+                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
+                    if (U_SUCCESS(status)) {
+#endif
+                        const UnicodeString *pStr;
+
+                        for (int32_t j=0; j<strings->size(); ++j) {
+                            pStr = (const UnicodeString *) strings->elementAt(j);
+                            (str = *pStr).toLower(root);
+                            foldSet.add(str);
+#if !UCONFIG_NO_BREAK_ITERATION
+                            (str = *pStr).toTitle(bi, root);
+                            foldSet.add(str);
+#endif
+                            (str = *pStr).toUpper(root);
+                            foldSet.add(str);
+                            (str = *pStr).foldCase();
+                            foldSet.add(str);
+                        }
+#if !UCONFIG_NO_BREAK_ITERATION
+                    }
+                    delete bi;
+#endif
+                }
+            }
+            *this = foldSet;
+        }
+    }
+    return *this;
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp

index 95ed2640aa47a77c8fdcafb74b0ec96e267ef33c..c4e46657f78611251a8428d597431da337da8411 100644 (file)
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@@ -331,65 +331,15 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern,
      len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
      bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
      fFlags(0)
-{   
-    if(U_SUCCESS(status)){
-        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
-        /* test for NULL */
-        if(list == NULL) {
-            status = U_MEMORY_ALLOCATION_ERROR;  
-        }else{
-            allocateStrings(status);
-            applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
-        }
-    }
-    _dbgct(this);
-}
-
-/**
- * Constructs a set from the given pattern, optionally ignoring
- * white space.  See the class description for the syntax of the
- * pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- */
-UnicodeSet::UnicodeSet(const UnicodeString& pattern,
-                       uint32_t options,
-                       const SymbolTable* symbols,
-                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
-    fFlags(0)
-{   
-    if(U_SUCCESS(status)){
-        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
-        /* test for NULL */
-        if(list == NULL) {
-            status = U_MEMORY_ALLOCATION_ERROR;  
-        }else{
-            allocateStrings(status);
-            applyPattern(pattern, options, symbols, status);
-        }
-    }
-    _dbgct(this);
-}
-
-UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
-                       uint32_t options,
-                       const SymbolTable* symbols,
-                       UErrorCode& status) :
-    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
-    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
-    fFlags(0)
  {
      if(U_SUCCESS(status)){
          list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
          /* test for NULL */
          if(list == NULL) {
-            status = U_MEMORY_ALLOCATION_ERROR;   
+            status = U_MEMORY_ALLOCATION_ERROR;  
          }else{
              allocateStrings(status);
-            applyPattern(pattern, pos, options, symbols, status);
+            applyPattern(pattern, status);
          }
      }
      _dbgct(this);
@@ -401,64 +351,46 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
  
  UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
                                       UErrorCode& status) {
-    return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
-}
-
-
-/**
- * Modifies this set to represent the set specified by the given
- * pattern, optionally ignoring white space.  See the class
- * description for the syntax of the pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- */
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
-                                     uint32_t options,
-                                     const SymbolTable* symbols,
-                                     UErrorCode& status) {
-    if (U_FAILURE(status) || isFrozen()) {
-        return *this;
-    }
-
+    // Equivalent to
+    //   return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
+    // but without dependency on closeOver().
      ParsePosition pos(0);
-    applyPattern(pattern, pos, options, symbols, status);
+    applyPatternIgnoreSpace(pattern, pos, NULL, status);
      if (U_FAILURE(status)) return *this;
  
      int32_t i = pos.getIndex();
-
-    if (options & USET_IGNORE_SPACE) {
-        // Skip over trailing whitespace
-        ICU_Utility::skipWhitespace(pattern, i, TRUE);
-    }
-
+    // Skip over trailing whitespace
+    ICU_Utility::skipWhitespace(pattern, i, TRUE);
      if (i != pattern.length()) {
          status = U_ILLEGAL_ARGUMENT_ERROR;
      }
      return *this;
  }
  
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
-                              ParsePosition& pos,
-                              uint32_t options,
-                              const SymbolTable* symbols,
-                              UErrorCode& status) {
-    if (U_FAILURE(status) || isFrozen()) {
-        return *this;
+void
+UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
+                                    ParsePosition& pos,
+                                    const SymbolTable* symbols,
+                                    UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (isFrozen()) {
+        status = U_NO_WRITE_PERMISSION;
+        return;
      }
      // Need to build the pattern in a temporary string because
      // _applyPattern calls add() etc., which set pat to empty.
      UnicodeString rebuiltPat;
      RuleCharacterIterator chars(pattern, symbols, pos);
-    applyPattern(chars, symbols, rebuiltPat, options, status);
-    if (U_FAILURE(status)) return *this;
+    applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+    if (U_FAILURE(status)) return;
      if (chars.inVariable()) {
          // syntaxError(chars, "Extra chars in variable value");
          status = U_MALFORMED_SET;
-        return *this;
+        return;
      }
      setPattern(rebuiltPat);
-    return *this;
  }
  
  /**
@@ -511,6 +443,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                                const SymbolTable* symbols,
                                UnicodeString& rebuiltPat,
                                uint32_t options,
+                              UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                                UErrorCode& ec) {
      if (U_FAILURE(ec)) return;
  
@@ -647,7 +580,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
              }
              switch (setMode) {
              case 1:
-                nested->applyPattern(chars, symbols, patLocal, options, ec);
+                nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
                  break;
              case 2:
                  chars.skipIgnored(opts);
@@ -879,10 +812,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
       * patterns like /[^abc]/i work.
       */
      if ((options & USET_CASE_INSENSITIVE) != 0) {
-        closeOver(USET_CASE_INSENSITIVE);
+        (this->*caseClosure)(USET_CASE_INSENSITIVE);
      }
      else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
-        closeOver(USET_ADD_CASE_MAPPINGS);
+        (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
      }
      if (invert) {
          complement();
@@ -1365,126 +1298,4 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
      rebuiltPat.append(pattern, 0, pos.getIndex());
  }
  
-//----------------------------------------------------------------
-// Case folding API
-//----------------------------------------------------------------
-
-// add the result of a full case mapping to the set
-// use str as a temporary string to avoid constructing one
-static inline void
-addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
-    if(result >= 0) {
-        if(result > UCASE_MAX_STRING_LENGTH) {
-            // add a single-code point case mapping
-            set.add(result);
-        } else {
-            // add a string case mapping from full with length result
-            str.setTo((UBool)FALSE, full, result);
-            set.add(str);
-        }
-    }
-    // result < 0: the code point mapped to itself, no need to add it
-    // see ucase.h
-}
-
-UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
-    if (isFrozen() || isBogus()) {
-        return *this;
-    }
-    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
-        const UCaseProps *csp = ucase_getSingleton();
-        {
-            UnicodeSet foldSet(*this);
-            UnicodeString str;
-            USetAdder sa = {
-                foldSet.toUSet(),
-                _set_add,
-                _set_addRange,
-                _set_addString,
-                NULL, // don't need remove()
-                NULL // don't need removeRange()
-            };
-
-            // start with input set to guarantee inclusion
-            // USET_CASE: remove strings because the strings will actually be reduced (folded);
-            //            therefore, start with no strings and add only those needed
-            if (attribute & USET_CASE_INSENSITIVE) {
-                foldSet.strings->removeAllElements();
-            }
-
-            int32_t n = getRangeCount();
-            UChar32 result;
-            const UChar *full;
-            int32_t locCache = 0;
-
-            for (int32_t i=0; i<n; ++i) {
-                UChar32 start = getRangeStart(i);
-                UChar32 end   = getRangeEnd(i);
-
-                if (attribute & USET_CASE_INSENSITIVE) {
-                    // full case closure
-                    for (UChar32 cp=start; cp<=end; ++cp) {
-                        ucase_addCaseClosure(csp, cp, &sa);
-                    }
-                } else {
-                    // add case mappings
-                    // (does not add long s for regular s, or Kelvin for k, for example)
-                    for (UChar32 cp=start; cp<=end; ++cp) {
-                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
-                        addCaseMapping(foldSet, result, full, str);
-
-                        result = ucase_toFullFolding(csp, cp, &full, 0);
-                        addCaseMapping(foldSet, result, full, str);
-                    }
-                }
-            }
-            if (strings != NULL && strings->size() > 0) {
-                if (attribute & USET_CASE_INSENSITIVE) {
-                    for (int32_t j=0; j<strings->size(); ++j) {
-                        str = *(const UnicodeString *) strings->elementAt(j);
-                        str.foldCase();
-                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
-                            foldSet.add(str); // does not map to code points: add the folded string itself
-                        }
-                    }
-                } else {
-                    Locale root("");
-#if !UCONFIG_NO_BREAK_ITERATION
-                    UErrorCode status = U_ZERO_ERROR;
-                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
-                    if (U_SUCCESS(status)) {
-#endif
-                        const UnicodeString *pStr;
-
-                        for (int32_t j=0; j<strings->size(); ++j) {
-                            pStr = (const UnicodeString *) strings->elementAt(j);
-                            (str = *pStr).toLower(root);
-                            foldSet.add(str);
-#if !UCONFIG_NO_BREAK_ITERATION
-                            (str = *pStr).toTitle(bi, root);
-                            foldSet.add(str);
-#endif
-                            (str = *pStr).toUpper(root);
-                            foldSet.add(str);
-                            (str = *pStr).foldCase();
-                            foldSet.add(str);
-                        }
-#if !UCONFIG_NO_BREAK_ITERATION
-                    }
-                    delete bi;
-#endif
-                }
-            }
-            *this = foldSet;
-        }
-    }
-    return *this;
-}
-
  U_NAMESPACE_END
diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp

index 67e8f28992eb54e629b28dcec002b7a04fe5a74a..f181cd3f01836e38620de0521066b70233075a02 100644 (file)
--- a/icu4c/source/common/unistr.cpp
+++ b/icu4c/source/common/unistr.cpp
@@ -1482,7 +1482,7 @@ UnicodeString::doHashCode() const
  {
      /* Delegate hash computation to uhash.  This makes UnicodeString
       * hashing consistent with UChar* hashing.  */
-    int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
+    int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
      if (hashCode == kInvalidHashCode) {
          hashCode = kEmptyHashCode;
      }
@@ -1674,6 +1674,29 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
  
  U_NAMESPACE_END
  
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashUnicodeString(const UHashTok key) {
+    const UnicodeString *str = (const UnicodeString*) key.pointer;
+    return (str == NULL) ? 0 : str->hashCode();
+}
+
+// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
+// does not depend on hashtable code.
+U_CAPI UBool U_EXPORT2
+uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
+    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
+    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
+    if (str1 == str2) {
+        return TRUE;
+    }
+    if (str1 == NULL || str2 == NULL) {
+        return FALSE;
+    }
+    return *str1 == *str2;
+}
+
  #ifdef U_STATIC_IMPLEMENTATION
  /*
  This should never be called. It is defined here to make sure that the
@@ -1683,7 +1706,6 @@ but defining it here makes sure that it is included with this object file.
  This makes sure that static library dependencies are kept to a minimum.
  */
  static void uprv_UnicodeStringDummy(void) {
-    U_NAMESPACE_USE
      delete [] (new UnicodeString[2]);
  }
  #endif
diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp

index a048c04eff34db7adc88b7f1a46ba616cfc00e89..4dda68a1813639bdb3cd643124d269f531e18a1d 100644 (file)
--- a/icu4c/source/common/unistr_case.cpp
+++ b/icu4c/source/common/unistr_case.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2010, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -18,13 +18,11 @@
  
  #include "unicode/utypes.h"
  #include "unicode/putil.h"
-#include "unicode/locid.h"
  #include "cstring.h"
  #include "cmemory.h"
  #include "unicode/ustring.h"
  #include "unicode/unistr.h"
  #include "unicode/uchar.h"
-#include "unicode/ubrk.h"
  #include "ustr_imp.h"
  #include "uhash.h"
  
@@ -84,23 +82,14 @@ UnicodeString::doCaseCompare(int32_t start,
  // Write implementation
  //========================================
  
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
  UnicodeString &
-UnicodeString::caseMap(BreakIterator *titleIter,
-                       const char *locale,
-                       uint32_t options,
-                       int32_t toWhichCase) {
+UnicodeString::caseMap(const UCaseMap *csm,
+                       UStringCaseMapper *stringCaseMapper) {
    if(isEmpty() || !isWritable()) {
      // nothing to do
      return *this;
    }
  
-  const UCaseProps *csp=ucase_getSingleton();
-
    // We need to allocate a new buffer for the internal string case mapping function.
    // This is very similar to how doReplace() keeps the old array pointer
    // and deletes the old array itself after it is done.
@@ -135,28 +124,8 @@ UnicodeString::caseMap(BreakIterator *titleIter,
    int32_t newLength;
    do {
      errorCode = U_ZERO_ERROR;
-    if(toWhichCase==TO_LOWER) {
-      newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
-                               oldArray, oldLength,
-                               locale, &errorCode);
-    } else if(toWhichCase==TO_UPPER) {
-      newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
-                               oldArray, oldLength,
-                               locale, &errorCode);
-    } else if(toWhichCase==TO_TITLE) {
-#if UCONFIG_NO_BREAK_ITERATION
-        errorCode=U_UNSUPPORTED_ERROR;
-#else
-      newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
-                               oldArray, oldLength,
-                               (UBreakIterator *)titleIter, locale, options, &errorCode);
-#endif
-    } else {
-      newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
-                                oldArray, oldLength,
-                                options,
-                                &errorCode);
-    }
+    newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
+                                 oldArray, oldLength, &errorCode);
      setLength(newLength);
    } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
  
@@ -169,49 +138,12 @@ UnicodeString::caseMap(BreakIterator *titleIter,
    return *this;
  }
  
-UnicodeString &
-UnicodeString::toLower() {
-  return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
-}
-
-UnicodeString &
-UnicodeString::toLower(const Locale &locale) {
-  return caseMap(0, locale.getName(), 0, TO_LOWER);
-}
-
-UnicodeString &
-UnicodeString::toUpper() {
-  return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
-}
-
-UnicodeString &
-UnicodeString::toUpper(const Locale &locale) {
-  return caseMap(0, locale.getName(), 0, TO_UPPER);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter) {
-  return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
-  return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
-  return caseMap(titleIter, locale.getName(), options, TO_TITLE);
-}
-
-#endif
-
  UnicodeString &
  UnicodeString::foldCase(uint32_t options) {
-    /* The Locale parameter isn't used. Use "" instead. */
-    return caseMap(0, "", options, FOLD_CASE);
+  UCaseMap csm=UCASEMAP_INITIALIZER;
+  csm.csp=ucase_getSingleton();
+  csm.options=options;
+  return caseMap(&csm, ustrcase_internalFold);
  }
  
  U_NAMESPACE_END
@@ -244,4 +176,3 @@ uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
      }
      return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
  }
-
diff --git a/icu4c/source/common/unistr_case_locale.cpp b/icu4c/source/common/unistr_case_locale.cpp

new file mode 100644 (file)

index 0000000..a8d5704
--- /dev/null
+++ b/icu4c/source/common/unistr_case_locale.cpp
@@ -0,0 +1,70 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  unistr_case_locale.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011may31
+*   created by: Markus W. Scherer
+*
+*   Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+*   were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/unistr.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+//========================================
+// Write implementation
+//========================================
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+    if(csm->csp==NULL) {
+        csm->csp=ucase_getSingleton();
+    }
+    if(locale!=NULL && locale[0]==0) {
+        csm->locale[0]=0;
+    } else {
+        ustrcase_setTempCaseMapLocale(csm, locale);
+    }
+}
+
+UnicodeString &
+UnicodeString::toLower() {
+  return toLower(Locale::getDefault());
+}
+
+UnicodeString &
+UnicodeString::toLower(const Locale &locale) {
+  UCaseMap csm=UCASEMAP_INITIALIZER;
+  setTempCaseMap(&csm, locale.getName());
+  return caseMap(&csm, ustrcase_internalToLower);
+}
+
+UnicodeString &
+UnicodeString::toUpper() {
+  return toUpper(Locale::getDefault());
+}
+
+UnicodeString &
+UnicodeString::toUpper(const Locale &locale) {
+  UCaseMap csm=UCASEMAP_INITIALIZER;
+  setTempCaseMap(&csm, locale.getName());
+  return caseMap(&csm, ustrcase_internalToUpper);
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/unistr_titlecase_brkiter.cpp b/icu4c/source/common/unistr_titlecase_brkiter.cpp

new file mode 100644 (file)

index 0000000..9b60e2a
--- /dev/null
+++ b/icu4c/source/common/unistr_titlecase_brkiter.cpp
@@ -0,0 +1,90 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  unistr_titlecase_brkiter.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:2
+*
+*   created on: 2011may30
+*   created by: Markus W. Scherer
+*
+*   Titlecasing functions that are based on BreakIterator
+*   were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+static int32_t U_CALLCONV
+unistr_case_internalToTitle(const UCaseMap *csm,
+                            UChar *dest, int32_t destCapacity,
+                            const UChar *src, int32_t srcLength,
+                            UErrorCode *pErrorCode) {
+  ubrk_setText(csm->iter, src, srcLength, pErrorCode);
+  return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode);
+}
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+    if(csm->csp==NULL) {
+        csm->csp=ucase_getSingleton();
+    }
+    if(locale!=NULL && locale[0]==0) {
+        csm->locale[0]=0;
+    } else {
+        ustrcase_setTempCaseMapLocale(csm, locale);
+    }
+}
+
+U_NAMESPACE_BEGIN
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter) {
+  return toTitle(titleIter, Locale::getDefault(), 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
+  return toTitle(titleIter, locale, 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
+  UCaseMap csm=UCASEMAP_INITIALIZER;
+  csm.options=options;
+  setTempCaseMap(&csm, locale.getName());
+  BreakIterator *bi=titleIter;
+  if(bi==NULL) {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    bi=BreakIterator::createWordInstance(locale, errorCode);
+    if(U_FAILURE(errorCode)) {
+      setToBogus();
+      return *this;
+    }
+  }
+  csm.iter=reinterpret_cast<UBreakIterator *>(bi);
+  caseMap(&csm, unistr_case_internalToTitle);
+  if(titleIter==NULL) {
+    delete bi;
+  }
+  return *this;
+}
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_BREAK_ITERATION
diff --git a/icu4c/source/common/uobject.cpp b/icu4c/source/common/uobject.cpp

index 515e943e10a42b9085e68210f7a3cbfa41a1e972..bf31c573ce27f9b5263423e83f2c0bd5df4093b5 100644 (file)
--- a/icu4c/source/common/uobject.cpp
+++ b/icu4c/source/common/uobject.cpp
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2002-2008, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -111,4 +111,9 @@ UObject::~UObject() {}
  
  U_NAMESPACE_END
  
+U_NAMESPACE_USE
  
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj) {
+    delete reinterpret_cast<UObject *>(obj);
+}
diff --git a/icu4c/source/common/uprops.cpp b/icu4c/source/common/uprops.cpp

index 81818b75859e8b1274d4d86edd5c05e4413eec55..3d57a861a5ee711f246bb694b47aeb32307ad1c4 100644 (file)
--- a/icu4c/source/common/uprops.cpp
+++ b/icu4c/source/common/uprops.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2010, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -288,32 +288,6 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
      }
  }
  
-#if !UCONFIG_NO_NORMALIZATION
-
-U_CAPI uint8_t U_EXPORT2
-u_getCombiningClass(UChar32 c) {
-    UErrorCode errorCode=U_ZERO_ERROR;
-    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
-    if(U_SUCCESS(errorCode)) {
-        return impl->getCC(impl->getNorm16(c));
-    } else {
-        return 0;
-    }
-}
-
-static uint16_t
-getFCD16(UChar32 c) {
-    UErrorCode errorCode=U_ZERO_ERROR;
-    const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
-    if(U_SUCCESS(errorCode)) {
-        return UTRIE2_GET16(trie, c);
-    } else {
-        return 0;
-    }
-}
-
-#endif
-
  struct IntProperty;
  
  typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
@@ -427,7 +401,7 @@ static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
  }
  #else
  static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
-    return getFCD16(c)>>8;
+    return unorm_getFCD16Simple(c)>>8;
  }
  #endif
  
@@ -437,7 +411,7 @@ static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
  }
  #else
  static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
-    return getFCD16(c)&0xff;
+    return unorm_getFCD16Simple(c)&0xff;
  }
  #endif
  
diff --git a/icu4c/source/common/uscript.c b/icu4c/source/common/uscript.c

index 2a7000431f566c430965a64b72f4e155b97ae667..6193c80baffed68ab8a4ed60cffd9c9503d34688 100644 (file)
--- a/icu4c/source/common/uscript.c
+++ b/icu4c/source/common/uscript.c
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1997-2010, International Business Machines
+*   Copyright (C) 1997-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *
@@ -95,16 +95,3 @@ uscript_getCode(const char* nameOrAbbrOrLocale,
      }
      return numFilled;
  }
-
-U_CAPI const char*  U_EXPORT2
-uscript_getName(UScriptCode scriptCode){
-    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
-                                  U_LONG_PROPERTY_NAME);
-}
-
-U_CAPI const char*  U_EXPORT2
-uscript_getShortName(UScriptCode scriptCode){
-    return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
-                                  U_SHORT_PROPERTY_NAME);
-}
-
diff --git a/icu4c/source/common/uset.cpp b/icu4c/source/common/uset.cpp

index 74ddba37c152d86667428e2e85c370d77509f9f0..5648a115d496226bac15b0c97e424f4b5147275b 100644 (file)
--- a/icu4c/source/common/uset.cpp
+++ b/icu4c/source/common/uset.cpp
@@ -149,11 +149,6 @@ uset_clear(USet* set) {
      ((UnicodeSet*) set)->UnicodeSet::clear();
  }
  
-U_CAPI void U_EXPORT2
-uset_closeOver(USet* set, int32_t attributes) {
-    ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
-}
-
  U_CAPI void U_EXPORT2
  uset_removeAllStrings(USet* set) {
      ((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
diff --git a/icu4c/source/common/uset_props.cpp b/icu4c/source/common/uset_props.cpp

index fb607390a2dd79e97abcc2249294f0e952b2b0f5..ed81d8b39dc1c5ce05dc65bc49be5602301a1c41 100644 (file)
--- a/icu4c/source/common/uset_props.cpp
+++ b/icu4c/source/common/uset_props.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2002-2006, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -38,7 +38,7 @@ uset_openPattern(const UChar* pattern, int32_t patternLength,
          *ec = U_MEMORY_ALLOCATION_ERROR;
          return 0;
      }
-    
+
      if (U_FAILURE(*ec)) {
          delete set;
          set = NULL;
@@ -58,7 +58,7 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
          *ec = U_MEMORY_ALLOCATION_ERROR;
          return 0;
      }
-    
+
      if (U_FAILURE(*ec)) {
          delete set;
          set = NULL;
@@ -134,3 +134,8 @@ uset_toPattern(const USet* set,
      ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
      return pat.extract(result, resultCapacity, *ec);
  }
+
+U_CAPI void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes) {
+    ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
+}
diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h

index 43291d733dc2d1d1c0ae7ef86bfddfefcbc8b3d4..ee54d332ef04590796e0f7f2ea66aeedc8a3da28 100644 (file)
--- a/icu4c/source/common/ustr_imp.h
+++ b/icu4c/source/common/ustr_imp.h
@@ -19,7 +19,7 @@
  #include "unicode/uiter.h"
  #include "ucase.h"
  
-/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
+/** Simple declaration to avoid including unicode/ubrk.h. */
  #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
  #   define UBRK_TYPEDEF_UBREAK_ITERATOR
      typedef struct UBreakIterator UBreakIterator;
@@ -105,48 +105,108 @@ typedef struct UCaseMap UCaseMap;
  #   define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
  #endif
  
-enum {
-    TO_LOWER,
-    TO_UPPER,
-    TO_TITLE,
-    FOLD_CASE
-};
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
  
-U_CFUNC int32_t
-ustr_toLower(const UCaseProps *csp,
-             UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
  
-U_CFUNC int32_t
-ustr_toUpper(const UCaseProps *csp,
-             UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode);
+/**
+ * String case mapping function type, used by ustrcase_map().
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+                  UChar *dest, int32_t destCapacity,
+                  const UChar *src, int32_t srcLength,
+                  UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
  
  #if !UCONFIG_NO_BREAK_ITERATION
  
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+                      UChar *dest, int32_t destCapacity,
+                      const UChar *src, int32_t srcLength,
+                      UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for string case mapping as a common function.
+ */
  U_CFUNC int32_t
-ustr_toTitle(const UCaseProps *csp,
+ustrcase_map(const UCaseMap *csm,
               UChar *dest, int32_t destCapacity,
               const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale, uint32_t options,
+             UStringCaseMapper *stringCaseMapper,
               UErrorCode *pErrorCode);
  
-#endif
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UTF8CaseMapper(const UCaseMap *csm,
+               uint8_t *dest, int32_t destCapacity,
+               const uint8_t *src, int32_t srcLength,
+               UErrorCode *pErrorCode);
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
+         uint8_t *dest, int32_t destCapacity,
+         const uint8_t *src, int32_t srcLength,
+         UErrorCode *pErrorCode);
  
  /**
- * Internal case folding function.
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
   */
  U_CFUNC int32_t
-ustr_foldCase(const UCaseProps *csp,
-              UChar *dest, int32_t destCapacity,
-              const UChar *src, int32_t srcLength,
-              uint32_t options,
-              UErrorCode *pErrorCode);
+ucasemap_mapUTF8(const UCaseMap *csm,
+                 uint8_t *dest, int32_t destCapacity,
+                 const uint8_t *src, int32_t srcLength,
+                 UTF8CaseMapper *stringCaseMapper,
+                 UErrorCode *pErrorCode);
+
+U_CAPI int32_t U_EXPORT2 
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2 
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
  
  /**
   * NUL-terminate a UChar * string if possible.
diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp

new file mode 100644 (file)

index 0000000..4a2352e
--- /dev/null
+++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp
@@ -0,0 +1,91 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ustr_titlecase_brkiter.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011may30
+*   created by: Markus W. Scherer
+*
+*   Titlecasing functions that are based on BreakIterator
+*   were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/ucasemap.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+/* functions available in the common library (for unistr_case.cpp) */
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ * Duplicate of the same function in ustrcase.cpp, to keep it inline.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+    if(csm->csp==NULL) {
+        csm->csp=ucase_getSingleton();
+    }
+    if(locale!=NULL && locale[0]==0) {
+        csm->locale[0]=0;
+    } else {
+        ustrcase_setTempCaseMapLocale(csm, locale);
+    }
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UBreakIterator *titleIter,
+             const char *locale,
+             UErrorCode *pErrorCode) {
+    UCaseMap csm=UCASEMAP_INITIALIZER;
+    setTempCaseMap(&csm, locale);
+    if(titleIter!=NULL) {
+        ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode);
+    } else {
+        csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode);
+    }
+    int32_t length=ustrcase_map(
+        &csm,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToTitle, pErrorCode);
+    if(titleIter==NULL && csm.iter!=NULL) {
+        ubrk_close(csm.iter);
+    }
+    return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+                 UChar *dest, int32_t destCapacity,
+                 const UChar *src, int32_t srcLength,
+                 UErrorCode *pErrorCode) {
+    if(csm->iter!=NULL) {
+        ubrk_setText(csm->iter, src, srcLength, pErrorCode);
+    } else {
+        csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
+    }
+    return ustrcase_map(
+        csm,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToTitle, pErrorCode);
+}
+
+#endif  // !UCONFIG_NO_BREAK_ITERATION
diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp

index 15740527db5e38b7c57fa00ad0b3e5799e2b9455..17468c9d493d5ebffd93c06f3cb62cc05976c456 100644 (file)
--- a/icu4c/source/common/ustrcase.cpp
+++ b/icu4c/source/common/ustrcase.cpp
@@ -19,7 +19,7 @@
  */
  
  #include "unicode/utypes.h"
-#include "unicode/uloc.h"
+#include "unicode/brkiter.h"
  #include "unicode/ustring.h"
  #include "unicode/ucasemap.h"
  #include "unicode/ubrk.h"
@@ -29,9 +29,11 @@
  
  #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  
+U_NAMESPACE_USE
+
  /* string casing ------------------------------------------------------------ */
  
-/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
+/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
  static inline int32_t
  appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
               int32_t result, const UChar *s) {
@@ -155,81 +157,31 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
      return destIndex;
  }
  
-static void
-setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode * /*pErrorCode*/) {
-    /*
-     * We could call ucasemap_setLocale(), but here we really only care about
-     * the initial language subtag, we need not return the real string via
-     * ucasemap_getLocale(), and we don't care about only getting "x" from
-     * "x-some-thing" etc.
-     *
-     * We ignore locales with a longer-than-3 initial subtag.
-     *
-     * We also do not fill in the locCache because it is rarely used,
-     * and not worth setting unless we reuse it for many case mapping operations.
-     * (That's why UCaseMap was created.)
-     */
-    int i;
-    char c;
-
-    /* the internal functions require locale!=NULL */
-    if(locale==NULL) {
-        locale=uloc_getDefault();
-    }
-    for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
-        csm->locale[i]=c;
-    }
-    if(i<=3) {
-        csm->locale[i]=0;  /* Up to 3 non-separator characters. */
-    } else {
-        csm->locale[0]=0;  /* Longer-than-3 initial subtag: Ignore. */
-    }
-}
-
-/*
- * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
- * Do this fast because it is called with every function call.
- */
-static inline void
-setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
-    if(csm->csp==NULL) {
-        csm->csp=ucase_getSingleton();
-    }
-    if(locale!=NULL && locale[0]==0) {
-        csm->locale[0]=0;
-    } else {
-        setTempCaseMapLocale(csm, locale, pErrorCode);
-    }
-}
-
  #if !UCONFIG_NO_BREAK_ITERATION
  
-/*
- * Internal titlecasing function.
- */
-static int32_t
-_toTitle(UCaseMap *csm,
-         UChar *dest, int32_t destCapacity,
-         const UChar *src, UCaseContext *csc,
-         int32_t srcLength,
-         UErrorCode *pErrorCode) {
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode) {
      const UChar *s;
      UChar32 c;
      int32_t prev, titleStart, titleLimit, idx, destIndex, length;
      UBool isFirstIndex;
  
-    if(csm->iter!=NULL) {
-        ubrk_setText(csm->iter, src, srcLength, pErrorCode);
-    } else {
-        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
-                            src, srcLength,
-                            pErrorCode);
-    }
      if(U_FAILURE(*pErrorCode)) {
          return 0;
      }
  
+    // Use the C++ abstract base class to minimize dependencies.
+    // TODO: Change UCaseMap.iter to store a BreakIterator directly.
+    BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
+
      /* set up local variables */
+    int32_t locCache=csm->locCache;
+    UCaseContext csc=UCASECONTEXT_INITIALIZER;
+    csc.p=(void *)src;
+    csc.limit=srcLength;
      destIndex=0;
      prev=0;
      isFirstIndex=TRUE;
@@ -239,9 +191,9 @@ _toTitle(UCaseMap *csm,
          /* find next index where to titlecase */
          if(isFirstIndex) {
              isFirstIndex=FALSE;
-            idx=ubrk_first(csm->iter);
+            idx=bi->first();
          } else {
-            idx=ubrk_next(csm->iter);
+            idx=bi->next();
          }
          if(idx==UBRK_DONE || idx>srcLength) {
              idx=srcLength;
@@ -291,14 +243,14 @@ _toTitle(UCaseMap *csm,
  
              if(titleStart<titleLimit) {
                  /* titlecase c which is from [titleStart..titleLimit[ */
-                csc->cpStart=titleStart;
-                csc->cpLimit=titleLimit;
-                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
+                csc.cpStart=titleStart;
+                csc.cpLimit=titleLimit;
+                c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
                  destIndex=appendResult(dest, destIndex, destCapacity, c, s); 
  
                  /* Special case Dutch IJ titlecasing */
                  if ( titleStart+1 < idx && 
-                     ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
+                     ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
                       ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
                       ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 
                              c=(UChar32) 0x004A;
@@ -314,7 +266,7 @@ _toTitle(UCaseMap *csm,
                              _caseMap(
                                  csm, ucase_toFullLower,
                                  dest+destIndex, destCapacity-destIndex,
-                                src, csc,
+                                src, &csc,
                                  titleLimit, idx,
                                  pErrorCode);
                      } else {
@@ -338,83 +290,41 @@ _toTitle(UCaseMap *csm,
      return destIndex;
  }
  
-#endif
+#endif  // !UCONFIG_NO_BREAK_ITERATION
  
  /* functions available in the common library (for unistr_case.cpp) */
  
-U_CFUNC int32_t
-ustr_toLower(const UCaseProps *csp,
-             UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode) {
      UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
-    csm.csp=csp;
-    setTempCaseMap(&csm, locale, pErrorCode);
      csc.p=(void *)src;
      csc.limit=srcLength;
-
-    return _caseMap(&csm, ucase_toFullLower,
-                    dest, destCapacity,
-                    src, &csc, 0, srcLength,
-                    pErrorCode);
+    return _caseMap(
+        csm, ucase_toFullLower,
+        dest, destCapacity,
+        src, &csc, 0, srcLength,
+        pErrorCode);
  }
  
-U_CFUNC int32_t
-ustr_toUpper(const UCaseProps *csp,
-             UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+                         UChar *dest, int32_t destCapacity,
+                         const UChar *src, int32_t srcLength,
+                         UErrorCode *pErrorCode) {
      UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
-    csm.csp=csp;
-    setTempCaseMap(&csm, locale, pErrorCode);
      csc.p=(void *)src;
      csc.limit=srcLength;
-
-    return _caseMap(&csm, ucase_toFullUpper,
-                    dest, destCapacity,
-                    src, &csc, 0, srcLength,
-                    pErrorCode);
+    return _caseMap(
+        csm, ucase_toFullUpper,
+        dest, destCapacity,
+        src, &csc, 0, srcLength,
+        pErrorCode);
  }
  
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CFUNC int32_t
-ustr_toTitle(const UCaseProps *csp,
-             UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale, uint32_t options,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
-    UCaseContext csc=UCASECONTEXT_INITIALIZER;
-    int32_t length;
-
-    csm.csp=csp;
-    csm.iter=titleIter;
-    csm.options=options;
-    setTempCaseMap(&csm, locale, pErrorCode);
-    csc.p=(void *)src;
-    csc.limit=srcLength;
-
-    length=_toTitle(&csm,
-                    dest, destCapacity,
-                    src, &csc, srcLength,
-                    pErrorCode);
-    if(titleIter==NULL && csm.iter!=NULL) {
-        ubrk_close(csm.iter);
-    }
-    return length;
-}
-
-#endif
-
-U_CFUNC int32_t
+static int32_t
  ustr_foldCase(const UCaseProps *csp,
                UChar *dest, int32_t destCapacity,
                const UChar *src, int32_t srcLength,
@@ -444,26 +354,27 @@ ustr_foldCase(const UCaseProps *csp,
      return destIndex;
  }
  
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
-/* common internal function for public API functions */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+                      UChar *dest, int32_t destCapacity,
+                      const UChar *src, int32_t srcLength,
+                      UErrorCode *pErrorCode) {
+    return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
+}
  
-static int32_t
-caseMap(const UCaseMap *csm,
-        UChar *dest, int32_t destCapacity,
-        const UChar *src, int32_t srcLength,
-        int32_t toWhichCase,
-        UErrorCode *pErrorCode) {
+U_CFUNC int32_t
+ustrcase_map(const UCaseMap *csm,
+             UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             UStringCaseMapper *stringCaseMapper,
+             UErrorCode *pErrorCode) {
      UChar buffer[300];
      UChar *temp;
  
      int32_t destLength;
  
      /* check argument values */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(*pErrorCode)) {
          return 0;
      }
      if( destCapacity<0 ||
@@ -501,40 +412,7 @@ caseMap(const UCaseMap *csm,
          temp=dest;
      }
  
-    destLength=0;
-
-    if(toWhichCase==FOLD_CASE) {
-        destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
-                                 csm->options, pErrorCode);
-    } else {
-        UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
-        csc.p=(void *)src;
-        csc.limit=srcLength;
-
-        if(toWhichCase==TO_LOWER) {
-            destLength=_caseMap(csm, ucase_toFullLower,
-                                temp, destCapacity,
-                                src, &csc,
-                                0, srcLength,
-                                pErrorCode);
-        } else if(toWhichCase==TO_UPPER) {
-            destLength=_caseMap(csm, ucase_toFullUpper,
-                                temp, destCapacity,
-                                src, &csc,
-                                0, srcLength,
-                                pErrorCode);
-        } else /* if(toWhichCase==TO_TITLE) */ {
-#if UCONFIG_NO_BREAK_ITERATION
-            *pErrorCode=U_UNSUPPORTED_ERROR;
-#else
-            /* UCaseMap is actually non-const in toTitle() APIs. */
-            destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
-                                src, &csc, srcLength,
-                                pErrorCode);
-#endif
-        }
-    }
+    destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
      if(temp!=dest) {
          /* copy the result string to the destination buffer */
          if(destLength>0) {
@@ -553,68 +431,6 @@ caseMap(const UCaseMap *csm,
  
  /* public API functions */
  
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
-    setTempCaseMap(&csm, locale, pErrorCode);
-    return caseMap(&csm,
-                   dest, destCapacity,
-                   src, srcLength,
-                   TO_LOWER, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strToUpper(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             const char *locale,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
-    setTempCaseMap(&csm, locale, pErrorCode);
-    return caseMap(&csm,
-                   dest, destCapacity,
-                   src, srcLength,
-                   TO_UPPER, pErrorCode);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
-             const UChar *src, int32_t srcLength,
-             UBreakIterator *titleIter,
-             const char *locale,
-             UErrorCode *pErrorCode) {
-    UCaseMap csm=UCASEMAP_INITIALIZER;
-    int32_t length;
-
-    csm.iter=titleIter;
-    setTempCaseMap(&csm, locale, pErrorCode);
-    length=caseMap(&csm,
-                   dest, destCapacity,
-                   src, srcLength,
-                   TO_TITLE, pErrorCode);
-    if(titleIter==NULL && csm.iter!=NULL) {
-        ubrk_close(csm.iter);
-    }
-    return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_toTitle(UCaseMap *csm,
-                 UChar *dest, int32_t destCapacity,
-                 const UChar *src, int32_t srcLength,
-                 UErrorCode *pErrorCode) {
-    return caseMap(csm,
-                   dest, destCapacity,
-                   src, srcLength,
-                   TO_TITLE, pErrorCode);
-}
-
-#endif
-
  U_CAPI int32_t U_EXPORT2
  u_strFoldCase(UChar *dest, int32_t destCapacity,
                const UChar *src, int32_t srcLength,
@@ -623,10 +439,11 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
      UCaseMap csm=UCASEMAP_INITIALIZER;
      csm.csp=ucase_getSingleton();
      csm.options=options;
-    return caseMap(&csm,
-                   dest, destCapacity,
-                   src, srcLength,
-                   FOLD_CASE, pErrorCode);
+    return ustrcase_map(
+        &csm,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalFold, pErrorCode);
  }
  
  /* case-insensitive string comparisons -------------------------------------- */
diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp

new file mode 100644 (file)

index 0000000..5707c5a
--- /dev/null
+++ b/icu4c/source/common/ustrcase_locale.cpp
@@ -0,0 +1,110 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ustrcase_locale.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011may31
+*   created by: Markus W. Scherer
+*
+*   Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+*   were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ucasemap.h"
+#include "unicode/uloc.h"
+#include "unicode/ustring.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) {
+    /*
+     * We could call ucasemap_setLocale(), but here we really only care about
+     * the initial language subtag, we need not return the real string via
+     * ucasemap_getLocale(), and we don't care about only getting "x" from
+     * "x-some-thing" etc.
+     *
+     * We ignore locales with a longer-than-3 initial subtag.
+     *
+     * We also do not fill in the locCache because it is rarely used,
+     * and not worth setting unless we reuse it for many case mapping operations.
+     * (That's why UCaseMap was created.)
+     */
+    int i;
+    char c;
+
+    /* the internal functions require locale!=NULL */
+    if(locale==NULL) {
+        // Do not call uprv_getDefaultLocaleID() because that does not see
+        // changes to the default locale via uloc_setDefault().
+        // It would also be inefficient if used frequently because uprv_getDefaultLocaleID()
+        // does not cache the locale ID.
+        //
+        // Unfortunately, uloc_getDefault() has many dependencies.
+        // We only care about a small set of language subtags,
+        // and we do not need the locale ID to be canonicalized.
+        //
+        // Best is to not call case mapping functions with a NULL locale ID.
+        locale=uloc_getDefault();
+    }
+    for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
+        csm->locale[i]=c;
+    }
+    if(i<=3) {
+        csm->locale[i]=0;  /* Up to 3 non-separator characters. */
+    } else {
+        csm->locale[0]=0;  /* Longer-than-3 initial subtag: Ignore. */
+    }
+}
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+    if(csm->csp==NULL) {
+        csm->csp=ucase_getSingleton();
+    }
+    if(locale!=NULL && locale[0]==0) {
+        csm->locale[0]=0;
+    } else {
+        ustrcase_setTempCaseMapLocale(csm, locale);
+    }
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode) {
+    UCaseMap csm=UCASEMAP_INITIALIZER;
+    setTempCaseMap(&csm, locale);
+    return ustrcase_map(
+        &csm,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToLower, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+             const UChar *src, int32_t srcLength,
+             const char *locale,
+             UErrorCode *pErrorCode) {
+    UCaseMap csm=UCASEMAP_INITIALIZER;
+    setTempCaseMap(&csm, locale);
+    return ustrcase_map(
+        &csm,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToUpper, pErrorCode);
+}
diff --git a/icu4c/source/common/ustring.cpp b/icu4c/source/common/ustring.cpp

index 2d181aa72b7708eb55cdcc3757cc85bd1c5ff5de..297b095d2d765f89405ff1759afb3449db85e4f9 100644 (file)
--- a/icu4c/source/common/ustring.cpp
+++ b/icu4c/source/common/ustring.cpp
@@ -1463,3 +1463,47 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod
      __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
      return length;
  }
+
+// Compute the hash code for a string -------------------------------------- ***
+
+// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
+// on UHashtable code.
+
+/*
+  Compute the hash by iterating sparsely over about 32 (up to 63)
+  characters spaced evenly through the string.  For each character,
+  multiply the previous hash value by a prime number and add the new
+  character in, like a linear congruential random number generator,
+  producing a pseudorandom deterministic value well distributed over
+  the output range. [LIU]
+*/
+
+#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
+    int32_t hash = 0;                         \
+    const TYPE *p = (const TYPE*) STR;        \
+    if (p != NULL) {                          \
+        int32_t len = (int32_t)(STRLEN);      \
+        int32_t inc = ((len - 32) / 32) + 1;  \
+        const TYPE *limit = p + len;          \
+        while (p<limit) {                     \
+            hash = (hash * 37) + DEREF;       \
+            p += inc;                         \
+        }                                     \
+    }                                         \
+    return hash
+
+/* Used by UnicodeString to compute its hashcode - Not public API. */
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length) {
+    STRING_HASH(UChar, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length) {
+    STRING_HASH(uint8_t, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length) {
+    STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
+}
diff --git a/icu4c/source/common/util.cpp b/icu4c/source/common/util.cpp

index c5395ffa896eb1c2744a19da25ef38dc7dbf4792..e67ef4642d19484671c059f8a7f380227e8f2a8d 100644 (file)
--- a/icu4c/source/common/util.cpp
+++ b/icu4c/source/common/util.cpp
@@ -9,7 +9,6 @@
  */
  
  #include "unicode/unimatch.h"
-#include "unicode/uniset.h"
  #include "patternprops.h"
  #include "util.h"
  
@@ -407,22 +406,3 @@ void ICU_Utility::appendToRule(UnicodeString& rule,
  }
  
  U_NAMESPACE_END
-
-U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
-uprv_openPatternWhiteSpaceSet(UErrorCode* ec) {
-    if(U_FAILURE(*ec)) {
-        return NULL;
-    }
-    // create a set with the Pattern_White_Space characters,
-    // without a pattern string for fewer code dependencies
-    U_NAMESPACE_QUALIFIER UnicodeSet *set=new U_NAMESPACE_QUALIFIER UnicodeSet(9, 0xd);
-    // Check for new failure.
-    if (set == NULL) {
-        *ec = U_MEMORY_ALLOCATION_ERROR;
-        return NULL;
-    }
-    set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
-    return set;
-}
-
-//eof
diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h

index ac88bd043176ed61da943443538ea297260be10e..7cb2c5a2e74c08c3bcf0f9d48c6b34e8cd3af7fa 100644 (file)
--- a/icu4c/source/common/util.h
+++ b/icu4c/source/common/util.h
@@ -23,7 +23,6 @@
  U_NAMESPACE_BEGIN
  
  class UnicodeMatcher;
-class UnicodeSet;
  
  class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
   public:
@@ -236,15 +235,5 @@ private:
  
  U_NAMESPACE_END
  
-/**
- * Returns a new set with the Pattern_White_Space characters.
- * The caller must close/delete the result.
- * Stable set of characters, won't change.
- * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
- * @internal
- */
-U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
-uprv_openPatternWhiteSpaceSet(UErrorCode* ec);
-
  #endif
  //eof
diff --git a/icu4c/source/common/uts46.cpp b/icu4c/source/common/uts46.cpp

index e08ee3566e46ba0512880a45daf1eb62532febfd..59792e5e6aae4a71076884f80395f5abdf1cfd02 100644 (file)
--- a/icu4c/source/common/uts46.cpp
+++ b/icu4c/source/common/uts46.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-*   Copyright (C) 2010, International Business Machines
+*   Copyright (C) 2010-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *******************************************************************************
  *   file name:  uts46.cpp
@@ -22,6 +22,7 @@
  #include "cmemory.h"
  #include "cstring.h"
  #include "punycode.h"
+#include "ubidi_props.h"
  #include "ustr_imp.h"
  
  #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@@ -1102,6 +1103,7 @@ isASCIIOkBiDi(const char *s, int32_t length) {
  
  UBool
  UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
+    const UBiDiProps *bdp=ubidi_getSingleton();
      // [IDNA2008-Tables]
      // 200C..200D  ; CONTEXTJ    # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
      for(int32_t i=0; i<labelLength; ++i) {
@@ -1123,7 +1125,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
              }
              // check precontext (Joining_Type:{L,D})(Joining_Type:T)*
              for(;;) {
-                UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
+                UJoiningType type=ubidi_getJoiningType(bdp, c);
                  if(type==U_JT_TRANSPARENT) {
                      if(j==0) {
                          return FALSE;
@@ -1141,7 +1143,7 @@ UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
                      return FALSE;
                  }
                  U16_NEXT_UNSAFE(label, j, c);
-                UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
+                UJoiningType type=ubidi_getJoiningType(bdp, c);
                  if(type==U_JT_TRANSPARENT) {
                      // just skip this character
                  } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
diff --git a/icu4c/source/i18n/alphaindex.cpp b/icu4c/source/i18n/alphaindex.cpp

index 870aee01fa0034986d70d5a96bb2ceb3e3680337..f8d5ffc3db4143f5426eaa2961aeb510c650e8cf 100644 (file)
--- a/icu4c/source/i18n/alphaindex.cpp
+++ b/icu4c/source/i18n/alphaindex.cpp
@@ -32,7 +32,7 @@
  #include "uvector.h"
  
  #include <string>
-#include <iostream>
+//#include <iostream>
  U_NAMESPACE_BEGIN
  
  UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex)
@@ -47,15 +47,6 @@ sortCollateComparator(const void *context, const void *left, const void *right);
  static int32_t U_CALLCONV
  recordCompareFn(const void *context, const void *left, const void *right);
  
-//
-//  UHash support function, delete a UnicodeSet
-//     TODO:  move this function into uhash.
-//
-static void U_CALLCONV
-uhash_deleteUnicodeSet(void *obj) {
-    delete static_cast<UnicodeSet *>(obj);
-}
-
  //  UVector<Bucket *> support function, delete a Bucket.
  static void U_CALLCONV
  alphaIndex_deleteBucket(void *obj) {
@@ -183,7 +174,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) {
      // that are the same according to the collator
  
      UVector preferenceSorting(status);   // Vector of UnicodeStrings; owned by the vector.
-    preferenceSorting.setDeleter(uhash_deleteUnicodeString);
+    preferenceSorting.setDeleter(uprv_deleteUObject);
      appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
      preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
  
@@ -236,7 +227,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) {
      const int32_t size = labelSet.size() - 1;
      if (size > maxLabelCount_) {
          UVector *newLabels = new UVector(status);
-        newLabels->setDeleter(uhash_deleteUnicodeString);
+        newLabels->setDeleter(uprv_deleteUObject);
          int32_t count = 0;
          int32_t old = -1;
          for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
@@ -580,13 +571,13 @@ void AlphabeticIndex::init(UErrorCode &status) {
                                          uhash_compareUnicodeString, // key Comparator,
                                          NULL,                       // value Comparator
                                          &status);
-    uhash_setKeyDeleter(alreadyIn_, uhash_deleteUnicodeString);
-    uhash_setValueDeleter(alreadyIn_, uhash_deleteUnicodeSet);
+    uhash_setKeyDeleter(alreadyIn_, uprv_deleteUObject);
+    uhash_setValueDeleter(alreadyIn_, uprv_deleteUObject);
  
      bucketList_            = new UVector(status);
      bucketList_->setDeleter(alphaIndex_deleteBucket);
      labels_                = new UVector(status);
-    labels_->setDeleter(uhash_deleteUnicodeString);
+    labels_->setDeleter(uprv_deleteUObject);
      labels_->setComparer(uhash_compareUnicodeString);
      inputRecords_          = new UVector(status);
      inputRecords_->setDeleter(alphaIndex_deleteRecord);
@@ -839,7 +830,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr
      }
  
      UVector *dest = new UVector(status);
-    dest->setDeleter(uhash_deleteUnicodeString);
+    dest->setDeleter(uprv_deleteUObject);
      for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
          if (results[i].length() > 0) {
              dest->addElement(results[i].clone(), status);
@@ -876,7 +867,7 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
          return NULL;
      }
      UVector *dest = new UVector(status);
-    dest->setDeleter(uhash_deleteUnicodeString);
+    dest->setDeleter(uprv_deleteUObject);
      if (dest == NULL && U_SUCCESS(status)) {
          status = U_MEMORY_ALLOCATION_ERROR;
      }
diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp

index d41180a5e0fb9dbd4fe8cf35144dbff633bc3246..b6a904aff02dd320b2a384097df7affdf3d414e8 100644 (file)
--- a/icu4c/source/i18n/calendar.cpp
+++ b/icu4c/source/i18n/calendar.cpp
@@ -437,7 +437,7 @@ protected:
          } else {
              ret->append((UChar)0x40); // '@' is a variant character
              ret->append(UNICODE_STRING("calendar=", 9));
-            ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())]));
+            ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())], -1, US_INV));
          }
          return ret;
      }
diff --git a/icu4c/source/i18n/currpinf.cpp b/icu4c/source/i18n/currpinf.cpp

index d3c95b1f7e019e99de424812ea58ef7a34d539d4..a0d37f13a227db1cf68935e392c7ac80da7fdf51 100644 (file)
--- a/icu4c/source/i18n/currpinf.cpp
+++ b/icu4c/source/i18n/currpinf.cpp
@@ -308,7 +308,7 @@ CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& st
                      std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n";
  #endif
  
-                    fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount), pattern, status);
+                    fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status);
                  }
              }
          }
diff --git a/icu4c/source/i18n/decContext.c b/icu4c/source/i18n/decContext.c

index 513e0215971e4fd82e39a1ef8c12272d3620e986..3b162f26995c7ea8006829d2be98cf99cb7ef621 100644 (file)
--- a/icu4c/source/i18n/decContext.c
+++ b/icu4c/source/i18n/decContext.c
@@ -1,7 +1,7 @@
  /* ------------------------------------------------------------------ */
  /* Decimal Context module                                             */
  /* ------------------------------------------------------------------ */
-/* Copyright (c) IBM Corporation, 2000-2010.  All rights reserved.    */
+/* Copyright (c) IBM Corporation, 2000-2011.  All rights reserved.    */
  /*                                                                    */
  /* This software is made available under the terms of the             */
  /* ICU License -- ICU 1.8.1 and later.                                */
@@ -25,10 +25,12 @@
  #include "decContext.h"       /* context and base types  */
  #include "decNumberLocal.h"   /* decNumber local types, etc.  */
  
+#if 0  /* ICU: No need to test endianness at runtime. */
  /* compile-time endian tester [assumes sizeof(Int)>1] */
  static  const  Int mfcone=1;                 /* constant 1  */
  static  const  Flag *mfctop=(Flag *)&mfcone; /* -> top byte  */
  #define LITEND *mfctop             /* named flag; 1=little-endian  */
+#endif
  
  /* ------------------------------------------------------------------ */
  /* round-for-reround digits                                           */
@@ -210,7 +212,9 @@ U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *context,
  /* ------------------------------------------------------------------ */
  U_CAPI decContext *  U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) {
    context->status|=status;
+#if 0  /* ICU: Do not raise signals. */
    if (status & context->traps) raise(SIGFPE);
+#endif
    return context;} /* decContextSetStatus  */
  
  /* ------------------------------------------------------------------ */
@@ -374,6 +378,7 @@ U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *co
  /*                                                                    */
  /* No error is possible.                                              */
  /* ------------------------------------------------------------------ */
+#if 0  /* ICU: Unused function. Anyway, do not call printf(). */
  U_CAPI Int  U_EXPORT2 uprv_decContextTestEndian(Flag quiet) {
    Int res=0;                  /* optimist  */
    uInt dle=(uInt)DECLITEND;   /* unsign  */
@@ -391,6 +396,7 @@ U_CAPI Int  U_EXPORT2 uprv_decContextTestEndian(Flag quiet) {
      }
    return res;
    } /* decContextTestEndian  */
+#endif
  
  /* ------------------------------------------------------------------ */
  /* decContextTestSavedStatus -- test bits in saved status             */
diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp

index 45acd03d73be7933af96ec81a69235c5dab04d85..1e2a9a09639ff9528d8db9a6df890490838a1553 100644 (file)
--- a/icu4c/source/i18n/decimfmt.cpp
+++ b/icu4c/source/i18n/decimfmt.cpp
@@ -433,7 +433,7 @@ DecimalFormat::construct(UErrorCode&             status,
          // For most locale, the patterns are probably the same for all
          // plural count. If not, the right pattern need to be re-applied
          // during format.
-        fCurrencyPluralInfo->getCurrencyPluralPattern("other", currencyPluralPatternForOther);
+        fCurrencyPluralInfo->getCurrencyPluralPattern(UNICODE_STRING("other", 5), currencyPluralPatternForOther);
          patternUsed = &currencyPluralPatternForOther;
          // TODO: not needed?
          setCurrencyForSymbols();
@@ -509,7 +509,7 @@ DecimalFormat::setupCurrencyAffixPatterns(UErrorCode& status) {
                                                      *fPosPrefixPattern,
                                                      *fPosSuffixPattern,
                                                      UCURR_SYMBOL_NAME);
-        fAffixPatternsForCurrency->put("default", affixPtn, status);
+        fAffixPatternsForCurrency->put(UNICODE_STRING("default", 7), affixPtn, status);
      }
  
      // save the unique currency plural patterns of this locale.
@@ -556,14 +556,13 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern,
              const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
              StringEnumeration* keywords = pluralRules->getKeywords(status);
              if (U_SUCCESS(status)) {
-                const char* pluralCountCh;
-                while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
+                const UnicodeString* pluralCount;
+                while ((pluralCount = keywords->snext(status)) != NULL) {
                      if ( U_SUCCESS(status) ) {
-                        UnicodeString pluralCount = UnicodeString(pluralCountCh);
-                        expandAffixAdjustWidth(&pluralCount);
+                        expandAffixAdjustWidth(pluralCount);
                          AffixesForCurrency* affix = new AffixesForCurrency(
                              fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
-                        fAffixesForCurrency->put(pluralCount, affix, status);
+                        fAffixesForCurrency->put(*pluralCount, affix, status);
                      }
                  }
              }
@@ -584,16 +583,15 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern,
              const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
              StringEnumeration* keywords = pluralRules->getKeywords(status);
              if (U_SUCCESS(status)) {
-                const char* pluralCountCh;
-                while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
+                const UnicodeString* pluralCount;
+                while ((pluralCount = keywords->snext(status)) != NULL) {
                      if ( U_SUCCESS(status) ) {
-                        UnicodeString pluralCount = UnicodeString(pluralCountCh);
                          UnicodeString ptn;
-                        fCurrencyPluralInfo->getCurrencyPluralPattern(pluralCount, ptn);
-                        applyPatternInternally(pluralCount, ptn, false, parseErr, status);
+                        fCurrencyPluralInfo->getCurrencyPluralPattern(*pluralCount, ptn);
+                        applyPatternInternally(*pluralCount, ptn, false, parseErr, status);
                          AffixesForCurrency* affix = new AffixesForCurrency(
                              fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
-                        fPluralAffixesForCurrency->put(pluralCount, affix, status);
+                        fPluralAffixesForCurrency->put(*pluralCount, affix, status);
                      }
                  }
              }
@@ -3281,17 +3279,13 @@ void DecimalFormat::expandAffix(const UnicodeString& pattern,
                          // For other cases, pluralCount == null,
                          // and plural names are not needed.
                          int32_t len;
-                        // TODO: num of char in plural count
-                        char pluralCountChar[10];
-                        if (pluralCount->length() >= 10) {
-                            break;
-                        }
-                        pluralCount->extract(0, pluralCount->length(), pluralCountChar);
+                        CharString pluralCountChar;
+                        pluralCountChar.appendInvariantChars(*pluralCount, ec);
                          UBool isChoiceFormat;
                          const UChar* s = ucurr_getPluralName(currencyUChars,
                              fSymbols != NULL ? fSymbols->getLocale().getName() :
                              Locale::getDefault().getName(), &isChoiceFormat,
-                            pluralCountChar, &len, &ec);
+                            pluralCountChar.data(), &len, &ec);
                          affix += UnicodeString(s, len);
                          handler.addAttribute(kCurrencyField, beginIdx, affix.length());
                      } else if(intl) {
diff --git a/icu4c/source/i18n/dtitvfmt.cpp b/icu4c/source/i18n/dtitvfmt.cpp

index 492fff73d0801fbec570f8f14c244b03ebc6e19e..6a5e84aeb7d773d270a657f5af5d9d1ffff54fce 100644 (file)
--- a/icu4c/source/i18n/dtitvfmt.cpp
+++ b/icu4c/source/i18n/dtitvfmt.cpp
@@ -1349,7 +1349,8 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton,
      DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
      DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
      if ( differenceInfo == 2 ) {
-        adjustedPtn.findAndReplace("v", "z");
+        adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */),
+                                   UnicodeString((UChar)0x7a /* z */));
      }
  
      UBool inQuote = false;
diff --git a/icu4c/source/i18n/dtitvinf.cpp b/icu4c/source/i18n/dtitvinf.cpp

index 2cdcd2fc9de5dd6a98164e99ee57d2b0a7b6424e..f5a57dd35ab2715b6468cec9473c88ca10b60409 100644 (file)
--- a/icu4c/source/i18n/dtitvinf.cpp
+++ b/icu4c/source/i18n/dtitvinf.cpp
@@ -267,42 +267,39 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err)
          int32_t size = ures_getSize(itvDtPtnResource);
          int32_t index;
          for ( index = 0; index < size; ++index ) {
-            UResourceBundle* oneRes = ures_getByIndex(itvDtPtnResource, index, 
-                                                     NULL, &status);
+            LocalUResourceBundlePointer oneRes(ures_getByIndex(itvDtPtnResource, index, 
+                                                     NULL, &status));
              if ( U_SUCCESS(status) ) {
-                const char* skeleton = ures_getKey(oneRes);
-                if ( skeleton == NULL || 
-                     skeletonSet.geti(UnicodeString(skeleton)) == 1 ) {
-                    ures_close(oneRes);
+                const char* skeleton = ures_getKey(oneRes.getAlias());
+                if (skeleton == NULL) {
                      continue;
                  }
-                skeletonSet.puti(UnicodeString(skeleton), 1, status);
+                UnicodeString skeletonUniStr(skeleton, -1, US_INV);
+                if ( skeletonSet.geti(skeletonUniStr) == 1 ) {
+                    continue;
+                }
+                skeletonSet.puti(skeletonUniStr, 1, status);
                  if ( uprv_strcmp(skeleton, gFallbackPatternTag) == 0 ) {
-                    ures_close(oneRes);
                      continue;  // fallback
                  }
-    
-                UResourceBundle* intervalPatterns = ures_getByKey(
-                                     itvDtPtnResource, skeleton, NULL, &status);
-    
+
+                LocalUResourceBundlePointer intervalPatterns(ures_getByKey(
+                                     itvDtPtnResource, skeleton, NULL, &status));
+
                  if ( U_FAILURE(status) ) {
-                    ures_close(intervalPatterns);
-                    ures_close(oneRes);
                      break;
                  }
                  if ( intervalPatterns == NULL ) {
-                    ures_close(intervalPatterns);
-                    ures_close(oneRes);
                      continue;
                  }
-    
+
                  const UChar* pattern;
                  const char* key;
                  int32_t ptLength;
-                int32_t ptnNum = ures_getSize(intervalPatterns);
+                int32_t ptnNum = ures_getSize(intervalPatterns.getAlias());
                  int32_t ptnIndex;
                  for ( ptnIndex = 0; ptnIndex < ptnNum; ++ptnIndex ) {
-                    pattern = ures_getNextString(intervalPatterns, &ptLength, &key,
+                    pattern = ures_getNextString(intervalPatterns.getAlias(), &ptLength, &key,
                                                   &status);
                      if ( U_FAILURE(status) ) {
                          break;
@@ -323,12 +320,10 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err)
                          calendarField = UCAL_MINUTE;
                      }
                      if ( calendarField != UCAL_FIELD_COUNT ) {
-                        setIntervalPatternInternally(skeleton, calendarField, pattern,status);
+                        setIntervalPatternInternally(skeletonUniStr, calendarField, pattern,status);
                      }
                  }
-                ures_close(intervalPatterns);
              }
-            ures_close(oneRes);
          }
      }
      ures_close(itvDtPtnResource);
diff --git a/icu4c/source/i18n/locdspnm.cpp b/icu4c/source/i18n/locdspnm.cpp

index 83ce4ec4a61d6df477cd0adf718f20e50ca4d439..d4f4121cd159c90aaef2f7f2c78ae67eaa50d677 100644 (file)
--- a/icu4c/source/i18n/locdspnm.cpp
+++ b/icu4c/source/i18n/locdspnm.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
-* Copyright (C) 2010, International Business Machines Corporation and         *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 2010-2011, International Business Machines Corporation and
+* others. All Rights Reserved.
  *******************************************************************************
  */
  
@@ -418,7 +418,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale,
      while ((key = e->next((int32_t *)0, status)) != NULL) {
        locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
        appendWithSep(resultRemainder, keyDisplayName(key, temp))
-          .append("=")
+          .append((UChar)0x3d /* = */)
            .append(keyValueDisplayName(key, value, temp2));
      }
      delete e;
diff --git a/icu4c/source/i18n/msgfmt.cpp b/icu4c/source/i18n/msgfmt.cpp

index b1644e51ddb55633cca52017bf44fc5a28598f9d..4ed4526ada8f967b4dde07c07331f9beebbe74d5 100644 (file)
--- a/icu4c/source/i18n/msgfmt.cpp
+++ b/icu4c/source/i18n/msgfmt.cpp
@@ -539,7 +539,7 @@ void MessageFormat::setArgStartFormat(int32_t argStart,
              delete formatter;
              return;
          }
-        uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
+        uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
      }
      if (formatter == NULL) {
          formatter = new DummyFormat();
@@ -841,7 +841,7 @@ MessageFormat::getFormatNames(UErrorCode& status) {
          status = U_MEMORY_ALLOCATION_ERROR;
          return NULL;
      }
-    fFormatNames->setDeleter(uhash_deleteUObject);
+    fFormatNames->setDeleter(uprv_deleteUObject);
  
      for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
          fFormatNames->addElement(new UnicodeString(getArgName(partIndex)), status);
@@ -1201,7 +1201,7 @@ void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
              if (U_FAILURE(ec)) {
                  return;
              }
-            uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
+            uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
          }
  
          const int32_t count = uhash_count(that.cachedFormatters);
diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp

index bd342b468743a1e48a71b42ea964dd1a196bd3f6..3b8c114f47e7956ae16a4a97db404e81938b9d2e 100644 (file)
--- a/icu4c/source/i18n/plurrule.cpp
+++ b/icu4c/source/i18n/plurrule.cpp
@@ -25,7 +25,6 @@
  #include "plurrule_impl.h"
  #include "putilimp.h"
  #include "ucln_in.h"
-#include "uhash.h"
  #include "ustrfmt.h"
  #include "locutil.h"
  
@@ -1381,7 +1380,7 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode
      if (U_FAILURE(status)) {
          return;
      }
-    fKeywordNames.setDeleter(uhash_deleteUObject);
+    fKeywordNames.setDeleter(uprv_deleteUObject);
      UBool  addKeywordOther=TRUE;
      RuleChain *node=header;
      while(node!=NULL) {
diff --git a/icu4c/source/i18n/rbt_data.cpp b/icu4c/source/i18n/rbt_data.cpp

index beef99231f9c1d98436f2b9a51f783fe1f2ffa70..bdb7ead8c3b2d29d855548ac58b3482c431b912d 100644 (file)
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1999-2008, International Business Machines
+*   Copyright (C) 1999-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -28,7 +28,7 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status)
      if (U_FAILURE(status)) {
          return;
      }
-    variableNames.setValueDeleter(uhash_deleteUnicodeString);
+    variableNames.setValueDeleter(uprv_deleteUObject);
      variables = 0;
      variablesLength = 0;
  }
@@ -41,7 +41,7 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&
  {
      UErrorCode status = U_ZERO_ERROR;
      int32_t i = 0;
-    variableNames.setValueDeleter(uhash_deleteUnicodeString);
+    variableNames.setValueDeleter(uprv_deleteUObject);
      int32_t pos = -1;
      const UHashElement *e;
      while ((e = other.variableNames.nextElement(pos)) != 0) {
diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp

index d05a0cc925b5c923040e43a067168e5c3d3d10fb..54132e01eb442fdc96a13c7154ead23174d85371 100644 (file)
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@@ -825,11 +825,11 @@ idBlockVector(statusReturn),
  variablesVector(statusReturn),
  segmentObjects(statusReturn)
  {
-    idBlockVector.setDeleter(uhash_deleteUnicodeString);
+    idBlockVector.setDeleter(uprv_deleteUObject);
      curData = NULL;
      compoundFilter = NULL;
      parseData = NULL;
-    variableNames.setValueDeleter(uhash_deleteUnicodeString);
+    variableNames.setValueDeleter(uprv_deleteUObject);
  }
  
  /**
diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp

index e42c5bab8844ee00829d124dd94d080472fd87a7..2c0d6caef59230f6f507eabb4a7cdebf2f33f21b 100644 (file)
--- a/icu4c/source/i18n/smpdtfmt.cpp
+++ b/icu4c/source/i18n/smpdtfmt.cpp
@@ -3058,7 +3058,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
              // next step. Otherwise, all time zone names starting with GMT/UT/UTC
              // (for example, "UTT") will fail.
              if (gmtLen > 0 && ((text.length() - start) == gmtLen)) {
-                TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
+                TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
                  cal.adoptTimeZone(tz);
                  return start + gmtLen;
              }
@@ -3115,7 +3115,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
              // Step 5
              // If we saw standalone GMT zero pattern, then use GMT.
              if (gmtLen > 0) {
-                TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
+                TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
                  cal.adoptTimeZone(tz);
                  return start + gmtLen;
              }
diff --git a/icu4c/source/i18n/smpdtfst.cpp b/icu4c/source/i18n/smpdtfst.cpp

index ecae3e7b775477146b7dd55bde9be4e406c053e8..2e138b84fb8a6be6fb3b1c23a31163bf52af27ad 100644 (file)
--- a/icu4c/source/i18n/smpdtfst.cpp
+++ b/icu4c/source/i18n/smpdtfst.cpp
@@ -33,27 +33,27 @@ SimpleDateFormatStaticSets::SimpleDateFormatStaticSets(UErrorCode *status)
    fTimeIgnorables(NULL),
    fOtherIgnorables(NULL)
  {
-    fDateIgnorables  = new UnicodeSet("[-,./[:whitespace:]]", *status);
-    fTimeIgnorables  = new UnicodeSet("[-.:[:whitespace:]]",  *status);
-    fOtherIgnorables = new UnicodeSet("[:whitespace:]",       *status);
-    
+    fDateIgnorables  = new UnicodeSet(UNICODE_STRING("[-,./[:whitespace:]]", 20), *status);
+    fTimeIgnorables  = new UnicodeSet(UNICODE_STRING("[-.:[:whitespace:]]", 19),  *status);
+    fOtherIgnorables = new UnicodeSet(UNICODE_STRING("[:whitespace:]", 14),       *status);
+
      // Check for null pointers
      if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) {
          goto ExitConstrDeleteAll;
      }
-    
+
      // Freeze all the sets
      fDateIgnorables->freeze();
      fTimeIgnorables->freeze();
      fOtherIgnorables->freeze();
-       
+
      return; // If we reached this point, everything is fine so just exit
-    
+
  ExitConstrDeleteAll: // Remove all sets and return error
      delete fDateIgnorables;  fDateIgnorables = NULL;
      delete fTimeIgnorables;  fTimeIgnorables = NULL;
      delete fOtherIgnorables; fOtherIgnorables = NULL;
-    
+
      *status = U_MEMORY_ALLOCATION_ERROR;
  }
  
diff --git a/icu4c/source/i18n/tblcoll.cpp b/icu4c/source/i18n/tblcoll.cpp

index e2e62b4fcef443e70d4754c3c8a08c5d0301f49a..04344e07019d06d58583bcf1591c252e3c8de412 100644 (file)
--- a/icu4c/source/i18n/tblcoll.cpp
+++ b/icu4c/source/i18n/tblcoll.cpp
@@ -70,6 +70,7 @@
  #include "cmemory.h"
  #include "cstring.h"
  #include "putilimp.h"
+#include "ustr_imp.h"
  
  /* public RuleBasedCollator constructor ---------------------------------- */
  
@@ -644,7 +645,7 @@ int32_t RuleBasedCollator::hashCode() const
  {
      int32_t length;
      const UChar *rules = ucol_getRules(ucollator, &length);
-    return uhash_hashUCharsN(rules, length);
+    return ustr_hashUCharsN(rules, length);
  }
  
  /**
diff --git a/icu4c/source/i18n/tmutfmt.cpp b/icu4c/source/i18n/tmutfmt.cpp

index 979f68029f004ebd220438e0865f9c82327b1588..dbd3ec6e4a754d38df2e8277a7024e57d58ce4cf 100644 (file)
--- a/icu4c/source/i18n/tmutfmt.cpp
+++ b/icu4c/source/i18n/tmutfmt.cpp
@@ -11,6 +11,7 @@
  
  #if !UCONFIG_NO_FORMATTING
  
+#include "charstr.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "hash.h"
@@ -495,12 +496,13 @@ TimeUnitFormat::readFromCurrentLocale(UTimeUnitFormatStyle style, const char* ke
                    if (fNumberFormat != NULL) {
                      messageFormat->setFormat(0, *fNumberFormat);
                    }
-                  MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
+                  UnicodeString pluralCountUniStr(pluralCount, -1, US_INV);
+                  MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCountUniStr);
                    if (formatters == NULL) {
                      formatters = (MessageFormat**)uprv_malloc(UTMUTFMT_FORMAT_STYLE_COUNT*sizeof(MessageFormat*));
                      formatters[UTMUTFMT_FULL_STYLE] = NULL;
                      formatters[UTMUTFMT_ABBREVIATED_STYLE] = NULL;
-                    countToPatterns->put(pluralCount, formatters, err);
+                    countToPatterns->put(pluralCountUniStr, formatters, err);
                      if (U_FAILURE(err)) {
                          uprv_free(formatters);
                      }
@@ -557,8 +559,8 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
      //
      StringEnumeration* keywords = fPluralRules->getKeywords(err);
      if (U_SUCCESS(err)) {
-        const char* pluralCount;
-        while ((pluralCount = keywords->next(NULL, err)) != NULL) {
+        const UnicodeString* pluralCount;
+        while ((pluralCount = keywords->snext(err)) != NULL) {
              if ( U_SUCCESS(err) ) {
                  for (int32_t i = 0; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; ++i) {
                      // for each time unit, 
@@ -572,13 +574,15 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
                          }
                          fTimeUnitToCountToPatterns[i] = countToPatterns;
                      }
-                    MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
+                    MessageFormat** formatters = (MessageFormat**)countToPatterns->get(*pluralCount);
                      if( formatters == NULL || formatters[style] == NULL ) {
                          // look through parents
                          const char* localeName = fLocale.getName();
+                        CharString pluralCountChars;
+                        pluralCountChars.appendInvariantChars(*pluralCount, err);
                          searchInLocaleChain(style, key, localeName,
                                              (TimeUnit::UTimeUnitFields)i, 
-                                            pluralCount, pluralCount, 
+                                            *pluralCount, pluralCountChars.data(), 
                                              countToPatterns, err);
                      }
                  }
@@ -601,7 +605,7 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE
  void 
  TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
                                  TimeUnit::UTimeUnitFields srcTimeUnitField,
-                                const char* srcPluralCount,
+                                const UnicodeString& srcPluralCount,
                                  const char* searchPluralCount, 
                                  Hashtable* countToPatterns,
                                  UErrorCode& err) {
diff --git a/icu4c/source/i18n/transreg.cpp b/icu4c/source/i18n/transreg.cpp

index 8e1d4f431fd265731686b6cc673fbb0089ff9dc0..90f0adafbeb9c0c61c494dd33bd1182ae82efc82 100644 (file)
--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (c) 2001-2010, International Business Machines
+*   Copyright (c) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -517,7 +517,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
      availableIDs(status)
  {
      registry.setValueDeleter(deleteEntry);
-    availableIDs.setDeleter(uhash_deleteUnicodeString);
+    availableIDs.setDeleter(uprv_deleteUObject);
      availableIDs.setComparer(uhash_compareCaselessUnicodeString);
      specDAG.setValueDeleter(uhash_deleteHashtable);
  }
@@ -936,12 +936,12 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source,
          if (U_FAILURE(status) || targets == 0) {
              return;
          }
-        targets->setValueDeleter(uhash_deleteUObject);
+        targets->setValueDeleter(uprv_deleteUObject);
          specDAG.put(source, targets, status);
      }
      UVector *variants = (UVector*) targets->get(target);
      if (variants == 0) {
-        variants = new UVector(uhash_deleteUnicodeString,
+        variants = new UVector(uprv_deleteUObject,
                                 uhash_compareCaselessUnicodeString, status);
          if (variants == 0) {
              return;
diff --git a/icu4c/source/i18n/tridpars.cpp b/icu4c/source/i18n/tridpars.cpp

index cf9fd3fb5b6eefd67aa0f24797d9677637f8ea80..56cec9521d22279df529d10fb3215d8ca6123346 100644 (file)
--- a/icu4c/source/i18n/tridpars.cpp
+++ b/icu4c/source/i18n/tridpars.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (c) 2002-2009, International Business Machines Corporation
+*   Copyright (c) 2002-2011, International Business Machines Corporation
  *   and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -907,7 +907,7 @@ void TransliteratorIDParser::init(UErrorCode &status) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
      }
-    special_inverses->setValueDeleter(uhash_deleteUnicodeString);
+    special_inverses->setValueDeleter(uprv_deleteUObject);
  
      umtx_lock(&LOCK);
      if (SPECIAL_INVERSES == NULL) {
diff --git a/icu4c/source/i18n/tzfmt.cpp b/icu4c/source/i18n/tzfmt.cpp

index ff36195bbde49818e7bb3b3bc262ff450a01366c..571d609a7f54e2d39ac97884e2548b56fd0fff7e 100644 (file)
--- a/icu4c/source/i18n/tzfmt.cpp
+++ b/icu4c/source/i18n/tzfmt.cpp
@@ -369,7 +369,7 @@ TimeZoneFormatDelegate::TimeZoneFormatDelegate(const Locale& locale, UErrorCode&
              if (!gTimeZoneFormatCacheInitialized) {
                  gTimeZoneFormatCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
                  if (U_SUCCESS(status)) {
-                    uhash_setKeyDeleter(gTimeZoneFormatCache, uhash_freeBlock);
+                    uhash_setKeyDeleter(gTimeZoneFormatCache, uprv_free);
                      uhash_setValueDeleter(gTimeZoneFormatCache, deleteTimeZoneFormatCacheEntry);
                      gTimeZoneFormatCacheInitialized = TRUE;
                      ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, timeZoneFormat_cleanup);
diff --git a/icu4c/source/i18n/tzgnames.cpp b/icu4c/source/i18n/tzgnames.cpp

index 3abcc36091b8dc20a3840f920047559ca4c7fdc9..9ace791007ecc80e3ac512c7462f7ba4b08e4104 100644 (file)
--- a/icu4c/source/i18n/tzgnames.cpp
+++ b/icu4c/source/i18n/tzgnames.cpp
@@ -69,7 +69,7 @@ hashPartialLocationKey(const UHashTok key) {
          .append(p->mzID)
          .append((UChar)0x23)
          .append((UChar)(p->isLong ? 0x4C : 0x53));
-    return uhash_hashUCharsN(str.getBuffer(), str.length());
+    return str.hashCode();
  }
  
  /**
@@ -209,7 +209,7 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node,
              if ((nameinfo->type & fTypes) != 0) {
                  // matches a requested type
                  if (fResults == NULL) {
-                    fResults = new UVector(uhash_freeBlock, NULL, status);
+                    fResults = new UVector(uprv_free, NULL, status);
                      if (fResults == NULL) {
                          status = U_MEMORY_ALLOCATION_ERROR;
                      }
@@ -350,7 +350,7 @@ TimeZoneGenericNames::initialize(const Locale& locale, UErrorCode& status) {
          cleanup();
          return;
      }
-    uhash_setKeyDeleter(fPartialLocationNamesMap, uhash_freeBlock);
+    uhash_setKeyDeleter(fPartialLocationNamesMap, uprv_free);
      // no value deleter
  
      // target region
diff --git a/icu4c/source/i18n/tznames.cpp b/icu4c/source/i18n/tznames.cpp

index 276807871a7bb4403ff0faa709ab5d8c4e8d8f55..402db4daf77669cb6d9d61534b1d979f55575de6 100644 (file)
--- a/icu4c/source/i18n/tznames.cpp
+++ b/icu4c/source/i18n/tznames.cpp
@@ -132,7 +132,7 @@ TimeZoneNamesDelegate::TimeZoneNamesDelegate(const Locale& locale, UErrorCode& s
              if (!gTimeZoneNamesCacheInitialized) {
                  gTimeZoneNamesCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
                  if (U_SUCCESS(status)) {
-                    uhash_setKeyDeleter(gTimeZoneNamesCache, uhash_freeBlock);
+                    uhash_setKeyDeleter(gTimeZoneNamesCache, uprv_free);
                      uhash_setValueDeleter(gTimeZoneNamesCache, deleteTimeZoneNamesCacheEntry);
                      gTimeZoneNamesCacheInitialized = TRUE;
                      ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONENAMES, timeZoneNames_cleanup);
@@ -277,7 +277,8 @@ TimeZoneNames::getExemplarLocationName(const UnicodeString& tzID, UnicodeString&
      int32_t sep = tzID.lastIndexOf((UChar)0x2F /* '/' */);
      if (sep > 0 && sep + 1 < tzID.length()) {
          name.setTo(tzID, sep + 1);
-        name.findAndReplace("_", " ");
+        name.findAndReplace(UnicodeString((UChar)0x5f /* _ */),
+                            UnicodeString((UChar)0x20 /* space */));
      } else {
          name.setToBogus();
      }
diff --git a/icu4c/source/i18n/tznames_impl.cpp b/icu4c/source/i18n/tznames_impl.cpp

index 55aa8cc8a75f412cb5d66704b8301ba020b4965e..d2682ea375732224fce8c65705d1fd9f25a0c123 100644 (file)
--- a/icu4c/source/i18n/tznames_impl.cpp
+++ b/icu4c/source/i18n/tznames_impl.cpp
@@ -866,7 +866,7 @@ ZNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node,
              if ((nameinfo->type & fTypes) != 0) {
                  // matches a requested type
                  if (fResults == NULL) {
-                    fResults = new UVector(uhash_freeBlock, NULL, status);
+                    fResults = new UVector(uprv_free, NULL, status);
                      if (fResults == NULL) {
                          status = U_MEMORY_ALLOCATION_ERROR;
                      }
@@ -1090,7 +1090,7 @@ TimeZoneNamesImpl::getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeS
  
  UnicodeString&
  TimeZoneNamesImpl::getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const {
-    ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region), tzID);
+    ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region, -1, US_INV), tzID);
      return tzID;
  }
  
diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp

index e6c5048d61ca9877b64c3445612e6ba1ed0187f9..cf7aed33d7835e970f2fda19cfdeac21e68e7b78 100644 (file)
--- a/icu4c/source/i18n/ucol_bld.cpp
+++ b/icu4c/source/i18n/ucol_bld.cpp
@@ -1397,7 +1397,7 @@ static const char* ReorderingTokenNames[] = {
  
  static void toUpper(const char* src, char* dst, uint32_t length) {
     for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) {
-       *dst = toupper(*src);
+       *dst = uprv_toupper(*src);
     }
     *dst = '\0';
  }
diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp

index bb686eed6f39e83a3597564770ad041c2b48f337..0488324b3eefa89b680422b01621ff8f42f77967 100644 (file)
--- a/icu4c/source/i18n/ucol_elm.cpp
+++ b/icu4c/source/i18n/ucol_elm.cpp
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2001-2010, International Business Machines
+*   Copyright (C) 2001-2011, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -157,7 +157,7 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
      if (U_FAILURE(*status)) {
          goto allocation_failure;
      }
-    uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
+    uhash_setValueDeleter(t->prefixLookup, uprv_free);
  
      t->contractions = uprv_cnttab_open(t->mapping, status);
      if (U_FAILURE(*status)) {
diff --git a/icu4c/source/i18n/ucol_tok.cpp b/icu4c/source/i18n/ucol_tok.cpp

index 377f7584592e5bbaf69d78796df278c70b36e9ca..b70c3145330d6571b27957ca2672ca0eeef86707 100644 (file)
--- a/icu4c/source/i18n/ucol_tok.cpp
+++ b/icu4c/source/i18n/ucol_tok.cpp
@@ -135,12 +135,6 @@ U_CDECL_END
  #endif
  
  
-/*static inline void U_CALLCONV
-uhash_freeBlockWrapper(void *obj) {
-    uhash_freeBlock(obj);
-}*/
-
-
  typedef struct {
      uint32_t startCE;
      uint32_t startContCE;
@@ -2367,7 +2361,7 @@ void ucol_tok_initTokenList(
      if(U_FAILURE(*status)) {
          return;
      }
-    uhash_setValueDeleter(src->tailored, uhash_freeBlock);
+    uhash_setValueDeleter(src->tailored, uprv_free);
  
      src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
      /* test for NULL */
diff --git a/icu4c/source/i18n/unicode/tmutfmt.h b/icu4c/source/i18n/unicode/tmutfmt.h

index 217176b315ff174446b6b4e576a96506d2b08cbd..35f5dc6c6c2d538e1d415bb1bc471bc8bf6799f9 100644 (file)
--- a/icu4c/source/i18n/unicode/tmutfmt.h
+++ b/icu4c/source/i18n/unicode/tmutfmt.h
@@ -243,7 +243,7 @@ private:
  
      // fill in fTimeUnitToCountToPatterns from locale fall-back chain
      void searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
-                             TimeUnit::UTimeUnitFields field, const char*,
+                             TimeUnit::UTimeUnitFields field, const UnicodeString&,
                               const char*, Hashtable*, UErrorCode&);
  
      // initialize hash table
diff --git a/icu4c/source/i18n/uspoof_conf.cpp b/icu4c/source/i18n/uspoof_conf.cpp

index 199e0c4d7699630449b8d6d994729d0ac5537023..c7a8c62ae50a4e94b257887664e8e1cd02fbedc7 100644 (file)
--- a/icu4c/source/i18n/uspoof_conf.cpp
+++ b/icu4c/source/i18n/uspoof_conf.cpp
@@ -233,19 +233,21 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL
      //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
      // Example Line from the confusables.txt source file:
      //   "1D702 ;      006E 0329 ;     SL      # MATHEMATICAL ITALIC SMALL ETA ... "
-    fParseLine = uregex_openC(
+    UnicodeString pattern(
          "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
          "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
             "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
          "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
          "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
          "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
-        "|^(.*?)$",                   // OR match any line, which catches illegal lines.
-        0, NULL, &status);
+        "|^(.*?)$", -1, US_INV);      // OR match any line, which catches illegal lines.
+    // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
+    fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
  
      // Regular expression for parsing a hex number out of a space-separated list of them.
      //   Capture group 1 gets the number, with spaces removed.
-    fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
+    pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
+    fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
  
      // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
      //   given the syntax of the input.
diff --git a/icu4c/source/i18n/uspoof_wsconf.cpp b/icu4c/source/i18n/uspoof_wsconf.cpp

index de6fa0652d5639bead67fecd1dae3af26e4a74b6..2417512de2be19c7760235d71e94f167b03bf69c 100644 (file)
--- a/icu4c/source/i18n/uspoof_wsconf.cpp
+++ b/icu4c/source/i18n/uspoof_wsconf.cpp
@@ -52,7 +52,6 @@ U_NAMESPACE_USE
  // The expression will match _all_ lines, including erroneous lines.
  // The result of the parse is returned via the contents of the (match) groups.
  static const char *parseExp = 
-        
          "(?m)"                                         // Multi-line mode
          "^([ \\t]*(?:#.*?)?)$"                         // A blank or comment line.  Matches Group 1.
          "|^(?:"                                        //   OR
@@ -115,7 +114,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
  
      anyCaseTrie = utrie2_open(0, 0, &status);
      lowerCaseTrie = utrie2_open(0, 0, &status);
-    
+
+    UnicodeString pattern(parseExp, -1, US_INV);
  
      // The scriptSets vector provides a mapping from TRIE values to the set of scripts.
      //
@@ -150,10 +150,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS,
      }
      u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);
  
+    parseRegexp = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
  
-
-    parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
-    
      // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
      //   given the syntax of the input.
      if (*input == 0xfeff) {
diff --git a/icu4c/source/i18n/vtzone.cpp b/icu4c/source/i18n/vtzone.cpp

index 51a6a81ef46c7ed03300acd0343bf0c60ef6fa07..7d5eb0197b593e6e4f511e9a71f6e4ee9031f605 100644 (file)
--- a/icu4c/source/i18n/vtzone.cpp
+++ b/icu4c/source/i18n/vtzone.cpp
@@ -1,6 +1,6 @@
  /*
  *******************************************************************************
-* Copyright (C) 2007-2010, International Business Machines Corporation and
+* Copyright (C) 2007-2011, International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
@@ -18,7 +18,6 @@
  #include "cmemory.h"
  #include "uvector.h"
  #include "gregoimp.h"
-#include "uhash.h"
  
  U_NAMESPACE_BEGIN
  
@@ -962,7 +961,7 @@ VTimeZone::VTimeZone(const VTimeZone& source)
      if (source.vtzlines != NULL) {
          UErrorCode status = U_ZERO_ERROR;
          int32_t size = source.vtzlines->size();
-        vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
+        vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
          if (U_SUCCESS(status)) {
              for (int32_t i = 0; i < size; i++) {
                  UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i);
@@ -1007,7 +1006,7 @@ VTimeZone::operator=(const VTimeZone& right) {
          if (right.vtzlines != NULL) {
              UErrorCode status = U_ZERO_ERROR;
              int32_t size = right.vtzlines->size();
-            vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
+            vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
              if (U_SUCCESS(status)) {
                  for (int32_t i = 0; i < size; i++) {
                      UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i);
@@ -1242,7 +1241,7 @@ VTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial,
  
  void
  VTimeZone::load(VTZReader& reader, UErrorCode& status) {
-    vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
+    vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
      if (U_FAILURE(status)) {
          return;
      }
@@ -1378,7 +1377,7 @@ VTimeZone::parse(UErrorCode& status) {
       // Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules.
      rules->setDeleter(deleteTimeZoneRule);
      
-    dates = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+    dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
      if (U_FAILURE(status)) {
          goto cleanupParse;
      }
@@ -1741,7 +1740,7 @@ VTimeZone::write(VTZWriter& writer, UErrorCode& status) const {
      } else {
          UVector *customProps = NULL;
          if (olsonzid.length() > 0 && icutzver.length() > 0) {
-            customProps = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+            customProps = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
              if (U_FAILURE(status)) {
                  return;
              }
@@ -1769,7 +1768,7 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) /*const*/ {
      }
      InitialTimeZoneRule *initial = NULL;
      UVector *transitionRules = NULL;
-    UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+    UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
      UnicodeString tzid;
  
      // Extract rules applicable to dates after the start time
@@ -1833,7 +1832,7 @@ VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) /*cons
          return;
      }
  
-    UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+    UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
      UnicodeString tzid;
  
      // Extract simple rules
diff --git a/icu4c/source/i18n/zonemeta.cpp b/icu4c/source/i18n/zonemeta.cpp

index 01de17cf06262d3b9fed798b5bd7d0d636beb030..79f8172ac7b0e0675aaab8bfa5caeefdc20c32ca 100644 (file)
--- a/icu4c/source/i18n/zonemeta.cpp
+++ b/icu4c/source/i18n/zonemeta.cpp
@@ -763,7 +763,7 @@ ZoneMeta::initAvailableMetaZoneIDs () {
              if (!gMetaZoneIDsInitialized) {
                  UErrorCode status = U_ZERO_ERROR;
                  UHashtable *metaZoneIDTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
-                uhash_setKeyDeleter(metaZoneIDTable, uhash_deleteUnicodeString);
+                uhash_setKeyDeleter(metaZoneIDTable, uprv_deleteUObject);
                  // No valueDeleter, because the vector maintain the value objects
                  UVector *metaZoneIDs = NULL;
                  if (U_SUCCESS(status)) {
@@ -775,7 +775,7 @@ ZoneMeta::initAvailableMetaZoneIDs () {
                      uhash_close(metaZoneIDTable);
                  }
                  if (U_SUCCESS(status)) {
-                    metaZoneIDs->setDeleter(uhash_freeBlock);
+                    metaZoneIDs->setDeleter(uprv_free);
  
                      UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
                      UResourceBundle *bundle = ures_getByKey(rb, gMapTimezonesTag, NULL, &status);
diff --git a/icu4c/source/test/depstest/dependencies.py b/icu4c/source/test/depstest/dependencies.py

new file mode 100755 (executable)

index 0000000..448f685
--- /dev/null
+++ b/icu4c/source/test/depstest/dependencies.py
@@ -0,0 +1,194 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: dependencies.py
+#
+# created on: 2011may26
+
+"""Reader module for dependency data for the ICU dependency tester.
+
+Reads dependencies.txt and makes the data available.
+
+Attributes:
+  files: Set of "library/filename.o" files mentioned in the dependencies file.
+  items: Map from library or group names to item maps.
+    Each item has a "type" ("library" or "group" or "system_symbols").
+    A library or group item can have an optional set of "files" (as in the files attribute).
+    Each item can have an optional set of "deps" (libraries & groups).
+    A group item also has a "library" name unless it is a group of system symbols.
+    The one "system_symbols" item and its groups have sets of "system_symbols"
+    with standard-library system symbol names.
+  libraries: Set of library names mentioned in the dependencies file.
+"""
+__author__ = "Markus W. Scherer"
+
+# TODO: Support binary items.
+# .txt syntax:   binary: tools/genrb
+# item contents: {"type": "binary"} with optional files & deps
+# A binary must not be used as a dependency for anything else.
+
+import sys
+
+files = set()
+items = {}
+libraries = set()
+
+_line_number = 0
+_groups_to_be_defined = set()
+
+def _CheckLibraryName(name):
+  global _line_number
+  if not name:
+    sys.exit("Error:%d: \"library: \" without name" % _line_number)
+  if name.endswith(".o"):
+    sys.exit("Error:%d: invalid library name %s"  % (_line_number, name))
+
+def _CheckGroupName(name):
+  global _line_number
+  if not name:
+    sys.exit("Error:%d: \"group: \" without name" % _line_number)
+  if "/" in name or name.endswith(".o"):
+    sys.exit("Error:%d: invalid group name %s"  % (_line_number, name))
+
+def _CheckFileName(name):
+  global _line_number
+  if "/" in name or not name.endswith(".o"):
+    sys.exit("Error:%d: invalid file name %s"  % (_line_number, name))
+
+def _RemoveComment(line):
+  global _line_number
+  _line_number = _line_number + 1
+  index = line.find("#")  # Remove trailing comment.
+  if index >= 0: line = line[:index]
+  return line.rstrip()  # Remove trailing newlines etc.
+
+def _ReadLine(f):
+  while True:
+    line = _RemoveComment(f.next())
+    if line: return line
+
+def _ReadFiles(deps_file, item, library_name):
+  global files
+  item_files = item.get("files")
+  while True:
+    line = _ReadLine(deps_file)
+    if not line: continue
+    if not line.startswith("    "): return line
+    if item_files == None: item_files = item["files"] = set()
+    for file_name in line.split():
+      _CheckFileName(file_name)
+      file_name = library_name + "/" + file_name
+      if file_name in files:
+        sys.exit("Error:%d: file %s listed in multiple groups" % (_line_number, file_name))
+      files.add(file_name)
+      item_files.add(file_name)
+
+def _IsLibrary(item): return item and item["type"] == "library"
+
+def _IsLibraryGroup(item): return item and "library" in item
+
+def _ReadDeps(deps_file, item, library_name):
+  global items, _line_number, _groups_to_be_defined
+  item_deps = item.get("deps")
+  while True:
+    line = _ReadLine(deps_file)
+    if not line: continue
+    if not line.startswith("    "): return line
+    if item_deps == None: item_deps = item["deps"] = set()
+    for dep in line.split():
+      _CheckGroupName(dep)
+      dep_item = items.get(dep)
+      if item["type"] == "system_symbols" and (_IsLibraryGroup(dep_item) or _IsLibrary(dep_item)):
+        sys.exit(("Error:%d: system_symbols depend on previously defined " +
+                  "library or library group %s") % (_line_number, dep))
+      if dep_item == None:
+        # Add this dependency as a new group.
+        items[dep] = {"type": "group"}
+        if library_name: items[dep]["library"] = library_name
+        _groups_to_be_defined.add(dep)
+      item_deps.add(dep)
+
+def _AddSystemSymbol(item, symbol):
+  exports = item.get("system_symbols")
+  if exports == None: exports = item["system_symbols"] = set()
+  exports.add(symbol)
+
+def _ReadSystemSymbols(deps_file, item):
+  global _line_number
+  while True:
+    line = _ReadLine(deps_file)
+    if not line: continue
+    if not line.startswith("    "): return line
+    line = line.lstrip()
+    if '"' in line:
+      # One double-quote-enclosed symbol on the line, allows spaces in a symbol name.
+      symbol = line[1:-1]
+      if line.startswith('"') and line.endswith('"') and '"' not in symbol:
+        _AddSystemSymbol(item, symbol)
+      else:
+        sys.exit("Error:%d: invalid quoted symbol name %s" % (_line_number, line))
+    else:
+      # One or more space-separate symbols.
+      for symbol in line.split(): _AddSystemSymbol(item, symbol)
+
+def Load():
+  """Reads "dependencies.txt" and populates the module attributes."""
+  global items, libraries, _line_number, _groups_to_be_defined
+  deps_file = open("dependencies.txt")
+  try:
+    line = None
+    current_type = None
+    while True:
+      while not line: line = _RemoveComment(deps_file.next())
+
+      if line.startswith("library: "):
+        current_type = "library"
+        name = line[9:].lstrip()
+        _CheckLibraryName(name)
+        if name in items:
+          sys.exit("Error:%d: library definition using duplicate name %s" % (_line_number, name))
+        libraries.add(name)
+        item = items[name] = {"type": "library"}
+        line = _ReadFiles(deps_file, item, name)
+      elif line.startswith("group: "):
+        current_type = "group"
+        name = line[7:].lstrip()
+        _CheckGroupName(name)
+        if name not in items:
+          sys.exit("Error:%d: group %s defined before mentioned as a dependency" %
+                   (_line_number, name))
+        if name not in _groups_to_be_defined:
+          sys.exit("Error:%d: group definition using duplicate name %s" % (_line_number, name))
+        _groups_to_be_defined.remove(name)
+        item = items[name]
+        library_name = item.get("library")
+        if library_name:
+          line = _ReadFiles(deps_file, item, library_name)
+        else:
+          line = _ReadSystemSymbols(deps_file, item)
+      elif line == "  deps":
+        if current_type == "library":
+          line = _ReadDeps(deps_file, items[name], name)
+        elif current_type == "group":
+          item = items[name]
+          line = _ReadDeps(deps_file, item, item.get("library"))
+        elif current_type == "system_symbols":
+          item = items[current_type]
+          line = _ReadDeps(deps_file, item, None)
+        else:
+          sys.exit("Error:%d: deps before any library or group" % _line_number)
+      elif line == "system_symbols:":
+        current_type = "system_symbols"
+        if current_type in items:
+          sys.exit("Error:%d: duplicate entry for system_symbols" % _line_number)
+        item = items[current_type] = {"type": current_type}
+        line = _ReadSystemSymbols(deps_file, item)
+      else:
+        sys.exit("Syntax error:%d: %s" % (_line_number, line))
+  except StopIteration:
+    pass
+  if _groups_to_be_defined:
+    sys.exit("Error: some groups mentioned in dependencies are undefined: %s" % _groups_to_be_defined)
diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt

new file mode 100644 (file)

index 0000000..3e87fdf
--- /dev/null
+++ b/icu4c/source/test/depstest/dependencies.txt
@@ -0,0 +1,893 @@
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: dependencies.txt
+#
+# created on: 2011may26
+# created by: Markus W. Scherer
+
+# Standard library symbols used by ICU --------------------------------------- #
+
+system_symbols:
+  deps
+    # C
+    PIC system_debug errno_perror malloc_functions c_strings c_string_formatting
+    floating_point trigonometry
+    stdlib_qsort
+    pthread system_locale
+    stdio_input stdio_output file_io readlink_function dir_io mmap_functions dlfcn
+    # C++
+    cplusplus iostream
+
+group: PIC
+    # Position-Independent Code (-fPIC) requires a Global Offset Table.
+    _GLOBAL_OFFSET_TABLE_
+
+group: system_debug
+    __assert_fail __stack_chk_fail
+
+group: errno_perror
+    perror  # putil.cpp uprv_dl_open() calls perror("dlopen")
+
+group: malloc_functions
+    free malloc realloc
+
+group: c_strings
+    isspace
+    __ctype_b_loc  # for <ctype.h>
+    # We must not use tolower and toupper because they are system-locale-sensitive (Turkish i).
+    strlen strchr strrchr strstr strcmp strncmp strcpy strncpy strcat strncat
+    memcmp memcpy memmove memset
+    # Additional symbols in an optimized build.
+    __strcpy_chk __strncpy_chk __strcat_chk __strncat_chk
+    __rawmemchr __memcpy_chk __memmove_chk
+
+group: c_string_formatting
+    atoi atol strtod strtol strtoul
+    sprintf
+    # Additional symbols in an optimized build.
+    __sprintf_chk
+
+group: floating_point
+    floor ceil modf fmod log pow sqrt
+
+group: trigonometry
+    acos asin atan atan2 cos sin tan
+    # Additional symbols in an optimized build.
+    sincos
+
+group: stdlib_qsort
+    qsort
+
+group: pthread
+    pthread_mutex_init pthread_mutex_destroy pthread_mutex_lock pthread_mutex_unlock
+
+group: system_locale
+    getenv
+    nl_langinfo setlocale
+    gettimeofday localtime_r tzname tzset __timezone
+
+group: stdio_input
+    fopen fclose fgets fread fseek ftell rewind feof fileno
+    # Additional symbols in an optimized build.
+    __fgets_chk __fread_chk
+
+group: stdio_output
+    fflush fwrite
+
+group: file_io
+    open close stat
+    # Additional symbols in an optimized build.
+    __xstat
+
+group: readlink_function
+    readlink  # putil.cpp uprv_tzname() calls this in a hack to get the time zone name
+
+group: dir_io
+    opendir closedir readdir  # for a hack to get the time zone name
+
+group: mmap_functions  # for memory-mapped data loading
+    mmap munmap
+
+group: dlfcn
+    dlopen dlclose dlsym  # called by putil.o only for icuplug.o
+
+group: cplusplus
+    __dynamic_cast
+    # The compiler generates references to the global operator delete
+    # even when no code actually uses it.
+    # ICU must not _use_ the global operator delete.
+    "operator delete(void*)"
+    # ICU also must not use the global operator new.
+    # "operator new[](unsigned long)"
+    # _Unwind_Resume is related to exceptions:
+    # "A call to this routine is inserted as the end of a landing pad that performs cleanup,
+    # but does not resume normal execution. It causes unwinding to proceed further."
+    # (Linux Standard Base Specification 1.3)
+    # Even though ICU does not actually use (nor handle) exceptions.
+    _Unwind_Resume
+
+group: iostream
+    "std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)"
+    "std::basic_ios<char, std::char_traits<char> >::eof() const"
+    "std::basic_ios<char, std::char_traits<char> >::fail() const"
+    "std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)"
+    std::istream::get()
+    std::istream::putback(char)
+    # Additional symbols in an optimized build.
+    "std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)"
+
+# ICU common library --------------------------------------------------------- #
+
+library: stubdata
+    stubdata.o  # Exports icudt48_dat.
+
+library: common
+    # All files in the common library are listed in its dependencies.
+  deps
+    # Libraries and groups that the common library depends on.
+    date_interval
+    breakiterator
+    uts46 filterednormalizer2 normalizer2 canonical_iterator
+    normlzr unormcmp unorm_it unorm
+    idna2003 stringprep
+    stringenumeration
+    unistr_core unistr_props unistr_case unistr_case_locale unistr_titlecase_brkiter unistr_cnv
+    uniset_core uniset_props uniset_closure usetiter uset uset_props
+    uiter
+    ucasemap ucasemap_titlecase_brkiter script_runs
+    uprops ubidi_props ucase uscript
+    ubidi ushape
+    resourcebundle service_registration resbund_cnv ures_cnv icudataver ucat
+    loclikely
+    conversion converter_selector ucnv_set ucnvdisp
+    messagepattern
+    icu_utility icu_utility_with_props
+    ustr_wcs
+    ucharstriebuilder ucharstrieiterator
+    bytestriebuilder bytestrieiterator
+    hashtable uhash uvector uvector32 uvector64 ulist
+    propsvec utrie2 utrie2_builder
+    sort
+    uinit utypes errorcode
+    icuplug
+    platform
+
+group: date_interval  # class DateInterval
+    dtintrv.o
+  deps
+    platform
+
+group: breakiterator
+    # We could try to split off a breakiterator_builder group,
+    # but we still need uniset_props for code like in the ThaiBreakEngine constructor
+    # which does
+    #   fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status)
+    brkiter.o brkeng.o ubrk.o
+    rbbi.o rbbinode.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o
+    rbbidata.o rbbirb.o
+    triedict.o dictbe.o
+  deps
+    resourcebundle service_registration
+    schriter utext uniset_core uniset_props
+    uhash ustack utrie
+    uvector32  # for triedict.o
+
+group: unormcmp  # unorm_compare()
+    unormcmp.o
+  deps
+    filterednormalizer2
+    uniset_props  # for uniset_getUnicode32Instance()
+    ucase
+
+group: unorm_it  # UNormIterator
+    unorm_it.o
+  deps
+    unorm uiter
+
+group: unorm  # old normalization C API
+    unorm.o
+  deps
+    filterednormalizer2
+    uniset_props  # for uniset_getUnicode32Instance()
+    uiter
+
+group: normlzr  # old Normalizer C++ class
+    normlzr.o
+  deps
+    filterednormalizer2
+    uniset_props  # for uniset_getUnicode32Instance()
+    schriter
+
+group: uts46
+    uts46.o
+  deps
+    normalizer2 punycode
+    uchar  # for u_charType() (via U_GET_GC_MASK(c))
+    ubidi_props  # for u_charDirection() & ubidi_getJoiningType()
+    unistr_core
+    stringpiece bytestream
+
+group: filterednormalizer2
+    filterednormalizer2.o
+  deps
+    normalizer2
+
+group: idna2003
+    uidna.o
+  deps
+    stringprep punycode
+
+group: stringprep
+    usprep.o
+  deps
+    unorm  # could change to use filterednormalizer2 directly for Unicode 3.2 normalization
+    normalizer2
+    ubidi_props
+
+group: canonical_iterator
+    caniter.o
+  deps
+    normalizer2 usetiter
+
+group: normalizer2
+    normalizer2.o
+    normalizer2impl.o
+  deps
+    uniset_core
+    unistr_core
+    utrie2_builder  # for building CanonIterData & FCD
+    uvector  # for building CanonIterData
+    uhash  # for the instance cache
+    udata
+
+group: punycode
+    punycode.o
+  deps
+    platform
+
+group: uset_props
+    uset_props.o
+  deps
+    uniset_closure uniset_props uniset_core
+
+group: uset
+    uset.o
+  deps
+    uniset_core
+
+group: uniset_closure
+    uniset_closure.o
+  deps
+    uniset_core unistr_case_locale unistr_titlecase_brkiter
+
+group: uniset_props
+    uniset_props.o ruleiter.o
+  deps
+    uniset_core uprops unistr_case
+    parsepos
+    resourcebundle
+    propname unames
+
+group: parsepos
+    parsepos.o
+  deps
+    platform
+
+group: usetiter  # UnicodeSetIterator
+    usetiter.o
+  deps
+    uniset_core
+
+group: uniset_core
+    unifilt.o unifunct.o
+    uniset.o bmpset.o unisetspan.o
+  deps
+    patternprops
+    unistr_core icu_utility
+    uvector
+
+group: icu_utility_with_props
+    util_props.o
+  deps
+    icu_utility uchar ucase
+
+group: icu_utility
+    util.o
+  deps
+    unistr_core patternprops
+
+group: utext
+    utext.o
+  deps
+    unistr_core ucase
+
+group: stringenumeration
+    ustrenum.o uenum.o
+  deps
+    unistr_core
+
+group: schriter
+    schriter.o
+    # The UCharCharacterIterator implements virtual void getText(UnicodeString& result)
+    # so it depends on UnicodeString, therefore it makes little sense to split
+    # schriter and uchriter into separate groups.
+    uchriter.o
+  deps
+    chariter unistr_core
+
+group: chariter
+    chariter.o
+  deps
+    platform
+
+group: uiter
+    uiter.o
+  deps
+    platform
+
+group: unistr_cnv
+    unistr_cnv.o
+  deps
+    conversion unistr_core
+
+group: unistr_core
+    unistr.o
+  deps
+    ustrtrns appendable
+
+group: uscript
+    uscript.o  # uscript_getCode() accepts a locale ID and loads its script code data
+  deps
+    propname resourcebundle
+
+group: uprops
+    uprops.o
+  deps
+    normalizer2
+    uchar
+    ubidi_props
+    unistr_case ustring_case  # only for case folding
+    ucase
+
+group: propname
+    propname.o
+  deps
+    bytestrie
+
+group: unames
+    unames.o
+  deps
+    uchar udata
+
+group: script_runs
+    usc_impl.o
+  deps
+    uchar
+
+group: uchar
+    uchar.o
+  deps
+    utrie2
+
+group: messagepattern  # for MessageFormat and tools
+    messagepattern.o
+  deps
+    patternprops unistr_core
+
+group: patternprops
+    patternprops.o
+  deps
+    PIC
+
+group: ushape
+    ushape.o
+  deps
+    ubidi_props
+
+group: ubidi
+    ubidi.o ubidiln.o ubidiwrt.o
+  deps
+    ubidi_props
+    uchar  # for doWriteReverse() which uses IS_COMBINING(u_charType(c))
+
+group: ubidi_props
+    ubidi_props.o
+  deps
+    utrie2
+
+group: unistr_props
+    unistr_props.o
+  deps
+    unistr_core uchar
+
+group: unistr_case_locale
+    unistr_case_locale.o
+  deps
+    unistr_case ustring_case_locale
+
+group: unistr_case
+    unistr_case.o
+  deps
+    unistr_core
+    ustring_case
+
+group: unistr_titlecase_brkiter
+    unistr_titlecase_brkiter.o
+  deps
+    ustr_titlecase_brkiter
+
+group: ustr_titlecase_brkiter
+    ustr_titlecase_brkiter.o
+  deps
+    breakiterator
+    ustring_case_locale ucase
+
+group: ucasemap_titlecase_brkiter
+    ucasemap_titlecase_brkiter.o
+  deps
+    ucasemap breakiterator utext
+
+group: ucasemap
+    ucasemap.o
+  deps
+    ustring_case
+    resourcebundle  # uloc_getName() etc.
+
+group: ustring_case_locale
+    ustrcase_locale.o
+  deps
+    ustring_case
+    resourcebundle  # for uloc_getDefault()
+
+group: ustring_case
+    ustrcase.o
+  deps
+    ucase
+
+group: ucase
+    ucase.o
+  deps
+    utrie2
+
+group: uinit
+    uinit.o
+  deps
+    ucnv_io icuplug
+
+group: converter_selector
+    ucnvsel.o
+  deps
+    conversion propsvec utrie2_builder uset ucnv_set
+
+group: ucnvdisp  # ucnv_getDisplayName()
+    ucnvdisp.o
+  deps
+    conversion resourcebundle
+
+group: ucnv_set  # ucnv_getUnicodeSet
+    ucnv_set.o
+  deps
+    uset
+
+group: conversion
+    ustr_cnv.o
+    ucnv.o ucnv_cnv.o ucnv_bld.o ucnv_cb.o ucnv_err.o
+    ucnv_ct.o
+    ucnvmbcs.o ucnv_ext.o
+    ucnvhz.o ucnvisci.o ucnv_lmb.o ucnv2022.o
+    ucnvlat1.o ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o
+    ucnvbocu.o ucnvscsu.o
+  deps
+    ucnv_io
+
+group: ucnv_io
+    ucnv_io.o
+  deps
+    sort stringenumeration udata
+
+group: service_registration
+    serv.o servnotf.o servlkf.o servlk.o servls.o servrbf.o servslkf.o
+    locutil.o
+  deps
+    locale_display_names resourcebundle
+    hashtable uvector
+
+group: ucat  # message-catalog-like API
+    ucat.o
+  deps
+    resourcebundle
+
+group: locale_display_names
+    locdispnames.o
+  deps
+    locresdata
+
+group: icudataver  # u_getDataVersion()
+    icudataver.o
+  deps
+    resourcebundle
+
+group: loclikely
+    loclikely.o
+  deps
+    resourcebundle
+
+group: locresdata
+    # This was intended to collect locale functions that load resource bundle data.
+    # See the resourcebundle group about what else loads data.
+    locresdata.o
+  deps
+    resourcebundle
+
+group: resbund_cnv  # paths are Unicode strings
+    resbund_cnv.o
+  deps
+    conversion resourcebundle ures_cnv
+
+group: ures_cnv  # ures_openU, path is a Unicode string
+    ures_cnv.o
+  deps
+    conversion resourcebundle
+
+group: resourcebundle
+    resbund.o uresbund.o uresdata.o
+    locavailable.o
+    # uloc_tag.c converts between old ICU/LDML/CLDR locale IDs and newer BCP 47 IDs.
+    # It uses data from resource bundles for some of the mappings.
+    # We might want to generate .c files for that data, to #include rather than load,
+    # to minimize dependencies from this code.
+    # Then we could separate this higher-level locale ID code from the resource bundle code.
+    uloc.o uloc_tag.o
+    # Even basic locid.cpp via Locale constructors and Locale::getDefault()
+    # depend on canonicalization and data loading.
+    # We can probably only disentangle basic locale ID handling from resource bundle code
+    # by hardcoding all of the locale ID data.
+    locid.o locmap.o wintz.o
+    # Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
+    locbased.o
+  deps
+    udata ucol_swp
+    sort stringenumeration uhash
+
+group: udata
+    udata.o ucmndata.o udatamem.o
+    umapfile.o
+  deps
+    uhash charstr stringpiece platform stubdata
+    file_io mmap_functions
+
+group: ucharstriebuilder
+    ucharstriebuilder.o
+  deps
+    ucharstrie stringtriebuilder sort
+    unistr_core
+
+group: ucharstrieiterator
+    ucharstrieiterator.o
+  deps
+    ucharstrie unistr_core uvector32
+
+group: ucharstrie
+    ucharstrie.o
+  deps
+    platform
+
+group: bytestriebuilder
+    bytestriebuilder.o
+  deps
+    bytestrie stringtriebuilder sort
+    charstr stringpiece
+
+group: bytestrieiterator
+    bytestrieiterator.o
+  deps
+    bytestrie charstr uvector32
+
+group: bytestrie
+    bytestrie.o
+  deps
+    platform
+
+group: stringtriebuilder
+    stringtriebuilder.o
+  deps
+    uhash
+
+group: propsvec
+    propsvec.o
+  deps
+    sort utrie2_builder
+
+group: utrie2_builder
+    utrie2_builder.o
+  deps
+    platform
+    utrie2
+    utrie  # for utrie2_fromUTrie()
+    ucol_swp  # for utrie_swap()
+
+group: utrie2
+    utrie2.o
+  deps
+    platform
+
+group: utrie  # Callers should use utrie2 instead.
+    utrie.o
+  deps
+    platform
+
+group: hashtable  # Maps UnicodeString to value.
+    uhash_us.o
+  deps
+    unistr_core
+    uhash
+
+group: uhash
+    uhash.o
+  deps
+    platform
+
+group: ustack
+    ustack.o
+  deps
+    uvector
+
+group: uvector
+    uvector.o
+  deps
+    platform
+    sort  # for UVector::sort()
+
+group: uvector32
+    uvectr32.o
+  deps
+    platform
+
+group: uvector64
+    uvectr64.o
+  deps
+    platform
+
+group: ulist
+    ulist.o
+  deps
+    platform
+
+group: sort
+    uarrsort.o
+  deps
+    platform
+
+group: ustr_wcs
+    ustr_wcs.o
+  deps
+    ustrtrns  # on platforms where wchar_t is UTF-32
+    # platform -- on other platforms
+
+group: ustrtrns
+    ustrtrns.o
+  deps
+    platform
+
+group: charstr
+    charstr.o
+  deps
+    unistr_core  # for CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode)
+    platform
+
+group: stringpiece
+    stringpiece.o
+  deps
+    PIC c_strings
+
+group: bytestream
+    bytestream.o
+  deps
+    platform
+
+group: appendable
+    appendable.o
+  deps
+    platform
+
+group: icuplug
+    icuplug.o
+  deps
+    platform
+
+group: ucol_swp
+    ucol_swp.o
+  deps
+    platform
+
+group: errorcode  # ErrorCode base class
+    errorcode.o
+  deps
+    utypes
+    PIC
+
+group: utypes  # u_errorName()
+    utypes.o
+
+group: platform
+    # Files in the "platform" group.
+    cmemory.o uobject.o
+    cstring.o cwchar.o uinvchar.o
+    ustring.o  # Other platform files really just need u_strlen
+    ustrfmt.o  # uprv_itou
+    utf_impl.o
+    putil.o
+    ucln_cmn.o  # for putil.o which calls ucln_common_registerCleanup
+    udataswp.o  # for uinvchar.o; TODO: move uinvchar.o swapper functions to udataswp.o?
+    umath.o
+    mutex.o umutex.o
+    utrace.o
+  deps
+    # The "platform" group has no ICU dependencies.
+    PIC system_debug malloc_functions c_strings c_string_formatting
+    floating_point pthread system_locale
+    stdio_input readlink_function dir_io
+    errno_perror dlfcn  # Move related code into icuplug.c?
+    cplusplus
+
+# ICU i18n library ----------------------------------------------------------- #
+
+library: i18n
+  deps
+    localedata charset_detector spoof_detection
+    alphabetic_index collation formatting formattable_cnv regex regex_cnv translit
+    universal_time_scale
+    uclean_i18n
+
+group: localedata
+    ulocdata.o
+  deps
+    uniset_props resourcebundle
+    uset_props  # TODO: change to using C++ UnicodeSet, remove this dependency
+
+group: charset_detector
+    csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o ucsdet.o
+  deps
+    conversion
+    uclean_i18n
+
+group: spoof_detection
+    uspoof.o uspoof_build.o uspoof_conf.o uspoof_impl.o uspoof_wsconf.o
+  deps
+    uniset_props regex unorm uscript
+
+group: alphabetic_index
+    alphaindex.o
+  deps
+    collation localedata
+    uclean_i18n
+
+group: collation
+    bocsu.o coleitr.o coll.o colldata.o sortkey.o tblcoll.o ucol.o
+    ucol_bld.o ucol_cnt.o ucol_elm.o ucol_res.o ucol_sit.o ucol_tok.o ucol_wgt.o ucoleitr.o
+    bms.o bmsearch.o search.o stsearch.o usearch.o
+  deps
+    common  # TODO: Could be narrower.
+    uclean_i18n
+
+group: formatting
+    # TODO: Try to subdivide this ball of wax.
+    # locale_display_names2
+    locdspnm.o
+    # currency
+    ucurr.o
+    # currencyformat
+    curramt.o currfmt.o currpinf.o currunit.o
+    # decimalformat
+    dcfmtsym.o decfmtst.o decimfmt.o
+    numfmt.o numsys.o unum.o winnmfmt.o
+    # rbnf
+    nfrs.o nfrule.o nfsubs.o rbnf.o
+    # measureformat
+    measfmt.o
+    # dateformat
+    astro.o buddhcal.o calendar.o cecal.o chnsecal.o coptccal.o ethpccal.o
+    gregocal.o gregoimp.o hebrwcal.o indiancal.o islamcal.o japancal.o persncal.o taiwncal.o
+    ucal.o
+    basictz.o olsontz.o rbtz.o simpletz.o timezone.o tzrule.o tztrans.o
+    vtzone.o vzone.o wintzimpl.o zonemeta.o zrule.o ztrans.o
+    tzfmt.o tzgnames.o tznames.o tznames_impl.o
+    datefmt.o dtfmtsym.o dtitvfmt.o dtitvinf.o dtptngen.o dtrule.o reldtfmt.o
+    smpdtfmt.o smpdtfst.o udateintervalformat.o udatpg.o windtfmt.o
+    udat.o
+    tmunit.o tmutamt.o tmutfmt.o
+    # messageformat
+    choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o
+  deps
+    digitlist formattable format
+    pluralrules
+    collation  # for rbnf
+    common
+    floating_point  # sqrt() for astro.o
+    trigonometry  # for astro.o
+    stdlib_qsort  # for ucurr.o (which does not use ICU's uarrsort.o)
+    uclean_i18n
+
+group: digitlist
+    digitlst.o decContext.o decNumber.o
+  deps
+    charstr stringpiece unistr_core
+
+group: formattable
+    fmtable.o
+    measure.o
+  deps
+    unistr_core digitlist stringpiece charstr
+
+group: formattable_cnv
+    fmtable_cnv.o
+  deps
+    formattable unistr_cnv conversion
+
+group: format
+    format.o fphdlimp.o fpositer.o
+  deps
+    resourcebundle parsepos unistr_core uvector32
+
+group: pluralrules
+    plurrule.o upluralrules.o
+  deps
+    patternprops resourcebundle uvector
+    unistr_case_locale
+
+group: regex_cnv
+    uregexc.o
+  deps
+    regex unistr_cnv
+
+group: regex
+    regexcmp.o regexst.o regextxt.o rematch.o repattrn.o uregex.o
+  deps
+    uniset_closure utext uvector32 uvector64 ustack
+    breakiterator
+    unistr_core
+    uinit  # TODO: Really needed?
+    uclean_i18n
+
+group: translit
+    anytrans.o brktrans.o casetrn.o cpdtrans.o name2uni.o uni2name.o nortrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o
+    esctrn.o unesctrn.o nultrans.o
+    funcrepl.o quant.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o strmatch.o strrepl.o translit.o transreg.o tridpars.o utrans.o
+  deps
+    common
+    formatting  # for Transliterator::getDisplayName()
+    uclean_i18n
+
+group: universal_time_scale
+    utmscale.o
+
+group: uclean_i18n
+    ucln_in.o
+  deps
+    platform
+
+# ICU io library ------------------------------------------------------------- #
+
+library: io
+  deps
+    ustdio ustream uclean_io
+
+group: ustdio
+    locbund.o sprintf.o sscanf.o ufile.o ufmt_cmn.o uprintf.o uprntf_p.o uscanf.o uscanf_p.o ustdio.o
+  deps
+    formatting conversion translit
+    uclean_io
+    stdio_output
+
+group: ustream
+    ustream.o
+  deps
+    unistr_cnv
+    uchar  # for u_isWhitespace()
+    iostream
+
+group: uclean_io
+    ucln_io.o
+  deps
+    platform
diff --git a/icu4c/source/test/depstest/depstest.py b/icu4c/source/test/depstest/depstest.py

new file mode 100755 (executable)

index 0000000..f25c8c7
--- /dev/null
+++ b/icu4c/source/test/depstest/depstest.py
@@ -0,0 +1,167 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: depstest.py
+#
+# created on: 2011may24
+
+"""ICU dependency tester.
+
+This probably works only on Linux.
+
+The exit code is 0 if everything is fine, 1 for errors, 2 for only warnings.
+
+Sample invocation:
+  ~/svn.icu/trunk/src/source/test/depstest$ ./depstest.py ~/svn.icu/trunk/dbg
+"""
+
+__author__ = "Markus W. Scherer"
+
+import glob
+import os.path
+import subprocess
+import sys
+
+import dependencies
+
+_ignored_symbols = set()
+_obj_files = {}
+_symbols_to_files = {}
+_return_value = 0
+
+def _ReadObjFile(root_path, library_name, obj_name):
+  global _ignored_symbols, _obj_files, _symbols_to_files
+  lib_obj_name = library_name + "/" + obj_name
+  if lib_obj_name in _obj_files:
+    print "Warning: duplicate .o file " + lib_obj_name
+    _return_value = 2
+    return
+
+  path = os.path.join(root_path, library_name, obj_name)
+  nm_result = subprocess.Popen(["nm", "--demangle", "--format=sysv",
+                                "--extern-only", "--no-sort", path],
+                               stdout=subprocess.PIPE).communicate()[0]
+  obj_imports = set()
+  obj_exports = set()
+  for line in nm_result.splitlines():
+    fields = line.split("|")
+    if len(fields) == 1: continue
+    name = fields[0].strip()
+    # Ignore symbols like '__cxa_pure_virtual',
+    # 'vtable for __cxxabiv1::__si_class_type_info' or
+    # 'DW.ref.__gxx_personality_v0'.
+    if name.startswith("__cxa") or "__cxxabi" in name or "__gxx" in name:
+      _ignored_symbols.add(name)
+      continue
+    type = fields[2].strip()
+    if type == "U":
+      obj_imports.add(name)
+    else:
+      # TODO: Investigate weak symbols (V, W) with or without values.
+      obj_exports.add(name)
+      _symbols_to_files[name] = lib_obj_name
+  _obj_files[lib_obj_name] = {"imports": obj_imports, "exports": obj_exports}
+
+def _ReadLibrary(root_path, library_name):
+  obj_paths = glob.glob(os.path.join(root_path, library_name, "*.o"))
+  for path in obj_paths:
+    _ReadObjFile(root_path, library_name, os.path.basename(path))
+
+def _Resolve(name, parents):
+  global _ignored_symbols, _obj_files, _symbols_to_files, _return_value
+  item = dependencies.items[name]
+  item_type = item["type"]
+  if name in parents:
+    sys.exit("Error: %s %s has a circular dependency on itself: %s" %
+             (item_type, name, parents))
+  # Check if already cached.
+  exports = item.get("exports")
+  if exports != None: return item
+  # Calculcate recursively.
+  parents.append(name)
+  imports = set()
+  exports = set()
+  system_symbols = item.get("system_symbols")
+  if system_symbols == None: system_symbols = item["system_symbols"] = set()
+  files = item.get("files")
+  if files:
+    for file_name in files:
+      obj_file = _obj_files[file_name]
+      imports |= obj_file["imports"]
+      exports |= obj_file["exports"]
+  imports -= exports | _ignored_symbols
+  deps = item.get("deps")
+  if deps:
+    for dep in deps:
+      dep_item = _Resolve(dep, parents)
+      # Detect whether this item needs to depend on dep,
+      # except when this item has no files, that is, when it is just
+      # a deliberate umbrella group or library.
+      dep_exports = dep_item["exports"]
+      dep_system_symbols = dep_item["system_symbols"]
+      if files and imports.isdisjoint(dep_exports) and imports.isdisjoint(dep_system_symbols):
+        print "Info:  %s %s  does not need to depend on  %s\n" % (item_type, name, dep)
+      # We always include the dependency's exports, even if we do not need them
+      # to satisfy local imports.
+      exports |= dep_exports
+      system_symbols |= dep_system_symbols
+  item["exports"] = exports
+  item["system_symbols"] = system_symbols
+  imports -= exports | system_symbols
+  for symbol in imports:
+    for file_name in files:
+      if symbol in _obj_files[file_name]["imports"]:
+        sys.stderr.write("Error:  %s %s  file  %s  imports  %s  but  %s  does not depend on  %s\n" %
+                         (item_type, name, file_name, symbol, name, _symbols_to_files.get(symbol)))
+    _return_value = 1
+  del parents[-1]
+  return item
+
+def Process(root_path):
+  """Loads dependencies.txt, reads the libraries' .o files, and processes them.
+
+  Modifies dependencies.items: Recursively builds each item's system_symbols and exports.
+  """
+  global _ignored_symbols, _obj_files, _return_value
+  dependencies.Load()
+  for name_and_item in dependencies.items.iteritems():
+    name = name_and_item[0]
+    item = name_and_item[1]
+    system_symbols = item.get("system_symbols")
+    if system_symbols:
+      for symbol in system_symbols:
+        _symbols_to_files[symbol] = name
+  for library_name in dependencies.libraries:
+    _ReadLibrary(root_path, library_name)
+  o_files_set = set(_obj_files.keys())
+  files_missing_from_deps = o_files_set - dependencies.files
+  files_missing_from_build = dependencies.files - o_files_set
+  if files_missing_from_deps:
+    sys.stderr.write("Error: files missing from dependencies.txt:\n%s\n" %
+                     sorted(files_missing_from_deps))
+    _return_value = 1
+  if files_missing_from_build:
+    sys.stderr.write("Error: files in dependencies.txt but not built:\n%s\n" %
+                     sorted(files_missing_from_build))
+    _return_value = 1
+  if not _return_value:
+    for library_name in dependencies.libraries:
+      _Resolve(library_name, [])
+
+def main():
+  global _return_value
+  if len(sys.argv) <= 1:
+    sys.exit(("Command line error: " +
+             "need one argument with the root path to the built ICU libraries/*.o files."))
+  Process(sys.argv[1])
+  if _ignored_symbols:
+    print "Info: ignored symbols:\n%s" % sorted(_ignored_symbols)
+  if not _return_value:
+    print "OK: Specified and actual dependencies match."
+  return _return_value
+
+if __name__ == "__main__":
+  sys.exit(main())
diff --git a/icu4c/source/test/intltest/canittst.cpp b/icu4c/source/test/intltest/canittst.cpp

index a1b9a2ad3182e95b2f1719d0333f11673068cb70..fe85b1fac3fefca1bae7bbaabb1dedacdf91172a 100644 (file)
--- a/icu4c/source/test/intltest/canittst.cpp
+++ b/icu4c/source/test/intltest/canittst.cpp
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT: 
- * Copyright (c) 2002-2010, International Business Machines Corporation and
+ * Copyright (c) 2002-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************
   *
@@ -130,7 +130,7 @@ void CanonicalIteratorTest::TestBasic() {
      // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
  
      Hashtable *permutations = new Hashtable(FALSE, status);
-    permutations->setValueDeleter(uhash_deleteUnicodeString);
+    permutations->setValueDeleter(uprv_deleteUObject);
      UnicodeString toPermute("ABC");
  
      CanonicalIterator::permute(toPermute, FALSE, permutations, status);
@@ -144,7 +144,7 @@ void CanonicalIteratorTest::TestBasic() {
      // try samples
      logln("testing samples");
      Hashtable *set = new Hashtable(FALSE, status);
-    set->setValueDeleter(uhash_deleteUnicodeString);
+    set->setValueDeleter(uprv_deleteUObject);
      int32_t i = 0;
      CanonicalIterator it("", status);
      if(U_SUCCESS(status)) {
diff --git a/icu4c/source/test/intltest/icusvtst.cpp b/icu4c/source/test/intltest/icusvtst.cpp

index 1a2e15abfe4e311bc21981842007cec92e4ef5c4..c45c68c9dbfc6c5bc9278552f3479e867f25d277 100644 (file)
--- a/icu4c/source/test/intltest/icusvtst.cpp
+++ b/icu4c/source/test/intltest/icusvtst.cpp
@@ -1,6 +1,6 @@
  /**
   *******************************************************************************
- * Copyright (C) 2001-2010, International Business Machines Corporation and
+ * Copyright (C) 2001-2011, International Business Machines Corporation and
   * others. All Rights Reserved.
   *******************************************************************************
   */
@@ -458,7 +458,7 @@ ICUServiceTest::testAPI_One()
      // should not be able to locate invisible services
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
          service.getVisibleIDs(ids, status);
          UnicodeString target = "en_US_BAR";
          confirmBoolean("18) find invisible", !ids.contains(&target));
@@ -551,7 +551,7 @@ class TestMultipleKeyStringFactory : public ICUServiceFactory {
      public:
      TestMultipleKeyStringFactory(const UnicodeString ids[], int32_t count, const UnicodeString& factoryID)
          : _status(U_ZERO_ERROR)
-        , _ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, count, _status)
+        , _ids(uprv_deleteUObject, uhash_compareUnicodeString, count, _status)
          , _factoryID(factoryID + ": ") 
      {
          for (int i = 0; i < count; ++i) {
@@ -680,7 +680,7 @@ ICUServiceTest::testAPI_Two()
      // iterate over the visual ids returned by the multiple factory
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
          service.getVisibleIDs(ids, status);
          for (int i = 0; i < ids.size(); ++i) {
              const UnicodeString* id = (const UnicodeString*)ids[i];
@@ -801,7 +801,7 @@ ICUServiceTest::testAPI_Two()
  
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
          service.getVisibleIDs(ids, status);
          for (int i = 0; i < ids.size(); ++i) {
              const UnicodeString* id = (const UnicodeString*)ids[i];
@@ -897,7 +897,7 @@ ICUServiceTest::testRBF()
      // list all of the resources 
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
          service.getVisibleIDs(ids, status);
          logln("all visible ids:");
          for (int i = 0; i < ids.size(); ++i) {
@@ -1164,7 +1164,7 @@ void ICUServiceTest::testLocale() {
  
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
          service.getVisibleIDs(ids, status);
          logln("all visible ids:");
          for (int i = 0; i < ids.size(); ++i) {
@@ -1176,7 +1176,7 @@ void ICUServiceTest::testLocale() {
      Locale::setDefault(loc, status);
      {
          UErrorCode status = U_ZERO_ERROR;
-        UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+        UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
          service.getVisibleIDs(ids, status);
          logln("all visible ids:");
          for (int i = 0; i < ids.size(); ++i) {
@@ -1368,7 +1368,7 @@ void ICUServiceTest::testCoverage()
                    }
            }
  
-      UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+      UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
            // yuck, this is awkward to use.  All because we pass null in an overload.
            // TODO: change this.
            UnicodeString str("Greet");
diff --git a/icu4c/source/test/intltest/svccoll.cpp b/icu4c/source/test/intltest/svccoll.cpp

index 3f0d6cc8a520c8c7b3ea58bf85efe1ffbacf62ac..a781d4a813927aed7f2e5a141604151d43d12c06 100644 (file)
--- a/icu4c/source/test/intltest/svccoll.cpp
+++ b/icu4c/source/test/intltest/svccoll.cpp
@@ -1,6 +1,6 @@
  /*
   *******************************************************************************
- * Copyright (C) 2003-2010, International Business Machines Corporation and    *
+ * Copyright (C) 2003-2011, International Business Machines Corporation and    *
   * others. All Rights Reserved.                                                *
   *******************************************************************************
   */
@@ -353,7 +353,7 @@ void CollationServiceTest::TestRegisterFactory(void)
          errln("memory allocation error");
          return;
      }
-    fuFUNames->setValueDeleter(uhash_deleteUnicodeString);
+    fuFUNames->setValueDeleter(uprv_deleteUObject);
  
      fuFUNames->put(fu_FU.getName(), new UnicodeString("ze leetle bunny Fu-Fu"), status);
      fuFUNames->put(fu_FU_FOO.getName(), new UnicodeString("zee leetel bunny Foo-Foo"), status);
diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp

index c3d98a3c0674e766812200fc30fbc87c60103a85..6c40121d93071b18b7b9c707b2de1a1e6998f89f 100644 (file)
--- a/icu4c/source/test/intltest/usettest.cpp
+++ b/icu4c/source/test/intltest/usettest.cpp
@@ -1573,7 +1573,7 @@ public:
      Hashtable contents;
  
      TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
-        contents.setValueDeleter(uhash_deleteUnicodeString);
+        contents.setValueDeleter(uprv_deleteUObject);
      }
  
      ~TokenSymbolTable() {}
diff --git a/icu4c/source/tools/genrb/reslist.c b/icu4c/source/tools/genrb/reslist.c

index 66d370405d190a981f5569d4cffda7c521d57cd2..5b1a433be77e0a0a5c45d30a04a5aa9c9ed6dcf5 100644 (file)
--- a/icu4c/source/tools/genrb/reslist.c
+++ b/icu4c/source/tools/genrb/reslist.c
@@ -886,7 +886,7 @@ struct SResource* array_open(struct SRBRoot *bundle, const char *tag, const stru
  static int32_t U_CALLCONV
  string_hash(const UHashTok key) {
      const struct SResource *res = (struct SResource *)key.pointer;
-    return uhash_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
+    return ustr_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
  }
  
  static UBool U_CALLCONV
author	Markus Scherer <markus.icu@gmail.com>
	Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Fri, 3 Jun 2011 05:23:57 +0000 (05:23 +0000)
icu4c/source/common/Makefile.in		patch \| blob \| history
icu4c/source/common/bytestriebuilder.cpp		patch \| blob \| history
icu4c/source/common/caniter.cpp		patch \| blob \| history
icu4c/source/common/cmemory.h		patch \| blob \| history
icu4c/source/common/common.vcxproj		patch \| blob \| history
icu4c/source/common/common.vcxproj.filters		patch \| blob \| history
icu4c/source/common/hash.h		patch \| blob \| history
icu4c/source/common/normalizer2.cpp		patch \| blob \| history
icu4c/source/common/normalizer2impl.cpp		patch \| blob \| history
icu4c/source/common/normalizer2impl.h		patch \| blob \| history
icu4c/source/common/propname.cpp		patch \| blob \| history
icu4c/source/common/rbbiscan.cpp		patch \| blob \| history
icu4c/source/common/serv.cpp		patch \| blob \| history
icu4c/source/common/servls.cpp		patch \| blob \| history
icu4c/source/common/stringtriebuilder.cpp		patch \| blob \| history
icu4c/source/common/ucasemap.cpp		patch \| blob \| history
icu4c/source/common/ucasemap_titlecase_brkiter.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/ucharstriebuilder.cpp		patch \| blob \| history
icu4c/source/common/uchriter.cpp		patch \| blob \| history
icu4c/source/common/ucln.h		patch \| blob \| history
icu4c/source/common/ucln_cmn.c		patch \| blob \| history
icu4c/source/common/ucln_imp.h		patch \| blob \| history
icu4c/source/common/ucnvisci.c		patch \| blob \| history
icu4c/source/common/ucnvsel.cpp		patch \| blob \| history
icu4c/source/common/uhash.c		patch \| blob \| history
icu4c/source/common/uhash.h		patch \| blob \| history
icu4c/source/common/uhash_us.cpp		patch \| blob \| history
icu4c/source/common/uinit.c		patch \| blob \| history
icu4c/source/common/unicode/ucnvsel.h		patch \| blob \| history
icu4c/source/common/unicode/uniset.h		patch \| blob \| history
icu4c/source/common/unicode/unistr.h		patch \| blob \| history
icu4c/source/common/uniset.cpp		patch \| blob \| history
icu4c/source/common/uniset_closure.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/uniset_props.cpp		patch \| blob \| history
icu4c/source/common/unistr.cpp		patch \| blob \| history
icu4c/source/common/unistr_case.cpp		patch \| blob \| history
icu4c/source/common/unistr_case_locale.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/unistr_titlecase_brkiter.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/uobject.cpp		patch \| blob \| history
icu4c/source/common/uprops.cpp		patch \| blob \| history
icu4c/source/common/uscript.c		patch \| blob \| history
icu4c/source/common/uset.cpp		patch \| blob \| history
icu4c/source/common/uset_props.cpp		patch \| blob \| history
icu4c/source/common/ustr_imp.h		patch \| blob \| history
icu4c/source/common/ustr_titlecase_brkiter.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/ustrcase.cpp		patch \| blob \| history
icu4c/source/common/ustrcase_locale.cpp	[new file with mode: 0644]	patch \| blob
icu4c/source/common/ustring.cpp		patch \| blob \| history
icu4c/source/common/util.cpp		patch \| blob \| history
icu4c/source/common/util.h		patch \| blob \| history
icu4c/source/common/uts46.cpp		patch \| blob \| history
icu4c/source/i18n/alphaindex.cpp		patch \| blob \| history
icu4c/source/i18n/calendar.cpp		patch \| blob \| history
icu4c/source/i18n/currpinf.cpp		patch \| blob \| history
icu4c/source/i18n/decContext.c		patch \| blob \| history
icu4c/source/i18n/decimfmt.cpp		patch \| blob \| history
icu4c/source/i18n/dtitvfmt.cpp		patch \| blob \| history
icu4c/source/i18n/dtitvinf.cpp		patch \| blob \| history
icu4c/source/i18n/locdspnm.cpp		patch \| blob \| history
icu4c/source/i18n/msgfmt.cpp		patch \| blob \| history
icu4c/source/i18n/plurrule.cpp		patch \| blob \| history
icu4c/source/i18n/rbt_data.cpp		patch \| blob \| history
icu4c/source/i18n/rbt_pars.cpp		patch \| blob \| history
icu4c/source/i18n/smpdtfmt.cpp		patch \| blob \| history
icu4c/source/i18n/smpdtfst.cpp		patch \| blob \| history
icu4c/source/i18n/tblcoll.cpp		patch \| blob \| history
icu4c/source/i18n/tmutfmt.cpp		patch \| blob \| history
icu4c/source/i18n/transreg.cpp		patch \| blob \| history
icu4c/source/i18n/tridpars.cpp		patch \| blob \| history
icu4c/source/i18n/tzfmt.cpp		patch \| blob \| history
icu4c/source/i18n/tzgnames.cpp		patch \| blob \| history
icu4c/source/i18n/tznames.cpp		patch \| blob \| history
icu4c/source/i18n/tznames_impl.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_bld.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_elm.cpp		patch \| blob \| history
icu4c/source/i18n/ucol_tok.cpp		patch \| blob \| history
icu4c/source/i18n/unicode/tmutfmt.h		patch \| blob \| history
icu4c/source/i18n/uspoof_conf.cpp		patch \| blob \| history
icu4c/source/i18n/uspoof_wsconf.cpp		patch \| blob \| history
icu4c/source/i18n/vtzone.cpp		patch \| blob \| history
icu4c/source/i18n/zonemeta.cpp		patch \| blob \| history
icu4c/source/test/depstest/dependencies.py	[new file with mode: 0755]	patch \| blob
icu4c/source/test/depstest/dependencies.txt	[new file with mode: 0644]	patch \| blob
icu4c/source/test/depstest/depstest.py	[new file with mode: 0755]	patch \| blob
icu4c/source/test/intltest/canittst.cpp		patch \| blob \| history
icu4c/source/test/intltest/icusvtst.cpp		patch \| blob \| history
icu4c/source/test/intltest/svccoll.cpp		patch \| blob \| history
icu4c/source/test/intltest/usettest.cpp		patch \| blob \| history
icu4c/source/tools/genrb/reslist.c		patch \| blob \| history