From: Markus Scherer Date: Fri, 3 Jun 2011 05:23:57 +0000 (+0000) Subject: ICU-8605 document & test ICU4C dependencies, remove cycles, reduce some deps; merged... X-Git-Tag: milestone-59-0-1~4748 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9f7d74001cf4068718bc87b952f90693559936fa;p=icu ICU-8605 document & test ICU4C dependencies, remove cycles, reduce some deps; merged from branches/markus/depstest -r 30155:30193 X-SVN-Rev: 30194 --- diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index 2fb2365383b..f99a5db1d85 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -90,12 +90,13 @@ stringtriebuilder.o bytestriebuilder.o \ bytestrie.o bytestrieiterator.o \ ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \ appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \ -utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ +utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \ +unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \ normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \ chariter.o schriter.o uchriter.o uiter.o \ patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \ uscript.o usc_impl.o unames.o \ -utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ +utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \ uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \ diff --git a/icu4c/source/common/bytestriebuilder.cpp b/icu4c/source/common/bytestriebuilder.cpp index c643583b0ba..4f4adfb2313 100644 --- a/icu4c/source/common/bytestriebuilder.cpp +++ b/icu4c/source/common/bytestriebuilder.cpp @@ -21,6 +21,7 @@ #include "uhash.h" #include "uarrsort.h" #include "uassert.h" +#include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -335,7 +336,7 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) : LinearMatchNode(len, nextNode), s(bytes) { - hash=hash*37+uhash_hashCharsN(bytes, len); + hash=hash*37+ustr_hashCharsN(bytes, len); } UBool diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp index e6b0e83778e..1eaf6d20f21 100644 --- a/icu4c/source/common/caniter.cpp +++ b/icu4c/source/common/caniter.cpp @@ -1,6 +1,6 @@ /* ***************************************************************************** - * Copyright (C) 1996-2010, International Business Machines Corporation and * + * Copyright (C) 1996-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ***************************************************************************** */ @@ -288,7 +288,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros if(U_FAILURE(status)) { return; } - subpermute.setValueDeleter(uhash_deleteUnicodeString); + subpermute.setValueDeleter(uprv_deleteUObject); for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) { cp = source.char32At(i); @@ -345,9 +345,9 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i if (U_FAILURE(status)) { return 0; } - result.setValueDeleter(uhash_deleteUnicodeString); - permutations.setValueDeleter(uhash_deleteUnicodeString); - basic.setValueDeleter(uhash_deleteUnicodeString); + result.setValueDeleter(uprv_deleteUObject); + permutations.setValueDeleter(uprv_deleteUObject); + basic.setValueDeleter(uprv_deleteUObject); UChar USeg[256]; int32_t segLen = segment.extract(USeg, 256, status); @@ -453,7 +453,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh while (iter.next()) { UChar32 cp2 = iter.getCodepoint(); Hashtable remainder(status); - remainder.setValueDeleter(uhash_deleteUnicodeString); + remainder.setValueDeleter(uprv_deleteUObject); if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) { continue; } diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 64e701aca54..d5e08a5338c 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -91,6 +91,22 @@ cmemory_inUse(void); U_CFUNC UBool cmemory_cleanup(void); +/** + * A function called by uhash_remove, + * uhash_close, or uhash_put to delete + * an existing key or value. + * @param obj A key or value stored in a hashtable + * @see uprv_deleteUObject + */ +typedef void U_CALLCONV UObjectDeleter(void* obj); + +/** + * Deleter for UObject instances. + * Works for all subclasses of UObject because it has a virtual destructor. + */ +U_CAPI void U_EXPORT2 +uprv_deleteUObject(void *obj); + #ifdef XP_CPLUSPLUS U_NAMESPACE_BEGIN diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj index 8afd9ec3531..cc2c3c23eaa 100644 --- a/icu4c/source/common/common.vcxproj +++ b/icu4c/source/common/common.vcxproj @@ -383,6 +383,7 @@ + @@ -414,6 +415,7 @@ + @@ -422,11 +424,15 @@ + + + + diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters index 562c4206331..292b171183c 100644 --- a/icu4c/source/common/common.vcxproj.filters +++ b/icu4c/source/common/common.vcxproj.filters @@ -391,6 +391,9 @@ properties & sets + + properties & sets + properties & sets @@ -466,6 +469,9 @@ strings + + strings + strings @@ -481,21 +487,33 @@ strings + + strings + strings strings + + strings + strings + + strings + strings strings + + strings + strings diff --git a/icu4c/source/common/hash.h b/icu4c/source/common/hash.h index 9fedd0e521f..57467daf218 100644 --- a/icu4c/source/common/hash.h +++ b/icu4c/source/common/hash.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * Date Name Description @@ -13,6 +13,7 @@ #include "unicode/unistr.h" #include "unicode/uobject.h" +#include "cmemory.h" #include "uhash.h" U_NAMESPACE_BEGIN @@ -108,7 +109,7 @@ inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp, uhash_init(&hashObj, keyHash, keyComp, valueComp, &status); if (U_SUCCESS(status)) { hash = &hashObj; - uhash_setKeyDeleter(hash, uhash_deleteUnicodeString); + uhash_setKeyDeleter(hash, uprv_deleteUObject); } } diff --git a/icu4c/source/common/normalizer2.cpp b/icu4c/source/common/normalizer2.cpp index 8c5fdb61d9c..93f074f4972 100644 --- a/icu4c/source/common/normalizer2.cpp +++ b/icu4c/source/common/normalizer2.cpp @@ -844,7 +844,18 @@ unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { // Some properties APIs ---------------------------------------------------- *** -U_CFUNC UNormalizationCheckResult U_EXPORT2 +U_CAPI uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + return impl->getCC(impl->getNorm16(c)); + } else { + return 0; + } +} + +U_CFUNC UNormalizationCheckResult unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { if(mode<=UNORM_NONE || UNORM_FCD<=mode) { return UNORM_YES; @@ -858,6 +869,17 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { } } +U_CFUNC uint16_t +unorm_getFCD16Simple(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode); + if(U_SUCCESS(errorCode)) { + return UTRIE2_GET16(trie, c); + } else { + return 0; + } +} + U_CAPI const uint16_t * U_EXPORT2 unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); diff --git a/icu4c/source/common/normalizer2impl.cpp b/icu4c/source/common/normalizer2impl.cpp index 11d0581072d..87b34408e66 100644 --- a/icu4c/source/common/normalizer2impl.cpp +++ b/icu4c/source/common/normalizer2impl.cpp @@ -25,7 +25,6 @@ #include "mutex.h" #include "normalizer2impl.h" #include "uassert.h" -#include "uhash.h" #include "uset_imp.h" #include "utrie2.h" #include "uvector.h" @@ -1713,7 +1712,7 @@ const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *l CanonIterData::CanonIterData(UErrorCode &errorCode) : trie(utrie2_open(0, 0, &errorCode)), - canonStartSets(uhash_deleteUObject, NULL, errorCode) {} + canonStartSets(uprv_deleteUObject, NULL, errorCode) {} CanonIterData::~CanonIterData() { utrie2_close(trie); diff --git a/icu4c/source/common/normalizer2impl.h b/icu4c/source/common/normalizer2impl.h index 15b6d8c8b38..4ff2386ee83 100644 --- a/icu4c/source/common/normalizer2impl.h +++ b/icu4c/source/common/normalizer2impl.h @@ -547,9 +547,16 @@ unorm2_swap(const UDataSwapper *ds, * Get the NF*_QC property for a code point, for u_getIntPropertyValue(). * @internal */ -U_CFUNC UNormalizationCheckResult U_EXPORT2 +U_CFUNC UNormalizationCheckResult unorm_getQuickCheck(UChar32 c, UNormalizationMode mode); +/** + * Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue(). + * @internal + */ +U_CFUNC uint16_t +unorm_getFCD16Simple(UChar32 c); + /** * Internal API, used by collation code. * Get access to the internal FCD trie table to be able to perform diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp index 1aafdaf4181..6d5d9357e0f 100644 --- a/icu4c/source/common/propname.cpp +++ b/icu4c/source/common/propname.cpp @@ -12,6 +12,7 @@ #include "propname.h" #include "unicode/uchar.h" #include "unicode/udata.h" +#include "unicode/uscript.h" #include "umutex.h" #include "cmemory.h" #include "cstring.h" @@ -312,3 +313,15 @@ u_getPropertyValueEnum(UProperty property, U_NAMESPACE_USE return PropNameData::getPropertyValueEnum(property, alias); } + +U_CAPI const char* U_EXPORT2 +uscript_getName(UScriptCode scriptCode){ + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, + U_LONG_PROPERTY_NAME); +} + +U_CAPI const char* U_EXPORT2 +uscript_getShortName(UScriptCode scriptCode){ + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, + U_SHORT_PROPERTY_NAME); +} diff --git a/icu4c/source/common/rbbiscan.cpp b/icu4c/source/common/rbbiscan.cpp index bba16e9ef24..ddec6873c52 100644 --- a/icu4c/source/common/rbbiscan.cpp +++ b/icu4c/source/common/rbbiscan.cpp @@ -23,7 +23,6 @@ #include "unicode/uchriter.h" #include "unicode/parsepos.h" #include "unicode/parseerr.h" -#include "util.h" #include "cmemory.h" #include "cstring.h" @@ -122,18 +121,14 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) // and the time to build these few sets should be small compared to a // full break iterator build. fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus); - UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus); - if (U_FAILURE(*rb->fStatus)) { - return; - } - fRuleSets[kRuleSet_white_space-128] = *whitespaceSet; - delete whitespaceSet; + // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:] + fRuleSets[kRuleSet_white_space-128].add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); fRuleSets[kRuleSet_name_char-128] = UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus); fRuleSets[kRuleSet_name_start_char-128] = UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus); fRuleSets[kRuleSet_digit_char-128] = UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus); if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) { // This case happens if ICU's data is missing. UnicodeSet tries to look up property - // names from the init string, can't find them, and claims an illegal arguement. + // names from the init string, can't find them, and claims an illegal argument. // Change the error so that the actual problem will be clearer to users. *rb->fStatus = U_BRK_INIT_ERROR; } @@ -1146,12 +1141,11 @@ void RBBIRuleScanner::scanSet() { pos.setIndex(fScanIndex); startPos = fScanIndex; UErrorCode localStatus = U_ZERO_ERROR; - uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE, - fSymbolTable, - localStatus); + uset = new UnicodeSet(); if (uset == NULL) { localStatus = U_MEMORY_ALLOCATION_ERROR; } + uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); if (U_FAILURE(localStatus)) { // TODO: Get more accurate position of the error from UnicodeSet's return info. // UnicodeSet appears to not be reporting correctly at this time. diff --git a/icu4c/source/common/serv.cpp b/icu4c/source/common/serv.cpp index 8ce591804d8..7e5ca53107d 100644 --- a/icu4c/source/common/serv.cpp +++ b/icu4c/source/common/serv.cpp @@ -278,7 +278,7 @@ public: DNCache(const Locale& _locale) : cache(), locale(_locale) { - // cache.setKeyDeleter(uhash_deleteUnicodeString); + // cache.setKeyDeleter(uprv_deleteUObject); } }; @@ -519,7 +519,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer // fallback to the one that succeeded, we want to hit the // cache the first time next goaround. if (cacheDescriptorList._obj == NULL) { - cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status); + cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status); if (U_FAILURE(status)) { return NULL; } diff --git a/icu4c/source/common/servls.cpp b/icu4c/source/common/servls.cpp index b3c78c78e3f..570c10a2628 100644 --- a/icu4c/source/common/servls.cpp +++ b/icu4c/source/common/servls.cpp @@ -15,7 +15,6 @@ #include "cmemory.h" #include "servloc.h" #include "ustrfmt.h" -#include "uhash.h" #include "charstr.h" #include "ucln_cmn.h" #include "uassert.h" @@ -163,7 +162,7 @@ private: ServiceEnumeration(const ICULocaleService* service, UErrorCode &status) : _service(service) , _timestamp(service->getTimestamp()) - , _ids(uhash_deleteUnicodeString, NULL, status) + , _ids(uprv_deleteUObject, NULL, status) , _pos(0) { _service->getVisibleIDs(_ids, status); @@ -172,7 +171,7 @@ private: ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status) : _service(other._service) , _timestamp(other._timestamp) - , _ids(uhash_deleteUnicodeString, NULL, status) + , _ids(uprv_deleteUObject, NULL, status) , _pos(0) { if(U_SUCCESS(status)) { diff --git a/icu4c/source/common/stringtriebuilder.cpp b/icu4c/source/common/stringtriebuilder.cpp index f016cb535ae..f30b13862b0 100644 --- a/icu4c/source/common/stringtriebuilder.cpp +++ b/icu4c/source/common/stringtriebuilder.cpp @@ -51,7 +51,7 @@ StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode errorCode=U_MEMORY_ALLOCATION_ERROR; } if(U_SUCCESS(errorCode)) { - uhash_setKeyDeleter(nodes, uhash_deleteUObject); + uhash_setKeyDeleter(nodes, uprv_deleteUObject); } } diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index d7543df87dd..0b52975d072 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -17,11 +17,12 @@ */ #include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/ubrk.h" #include "unicode/uloc.h" #include "unicode/ustring.h" #include "unicode/ucasemap.h" #if !UCONFIG_NO_BREAK_ITERATION -#include "unicode/ubrk.h" #include "unicode/utext.h" #endif #include "cmemory.h" @@ -29,6 +30,8 @@ #include "ucase.h" #include "ustr_imp.h" +U_NAMESPACE_USE + /* UCaseMap service object -------------------------------------------------- */ U_CAPI UCaseMap * U_EXPORT2 @@ -60,7 +63,8 @@ U_CAPI void U_EXPORT2 ucasemap_close(UCaseMap *csm) { if(csm!=NULL) { #if !UCONFIG_NO_BREAK_ITERATION - ubrk_close(csm->iter); + // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. + delete reinterpret_cast(csm->iter); #endif uprv_free(csm); } @@ -106,21 +110,6 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/ csm->options=options; } -#if !UCONFIG_NO_BREAK_ITERATION - -U_CAPI const UBreakIterator * U_EXPORT2 -ucasemap_getBreakIterator(const UCaseMap *csm) { - return csm->iter; -} - -U_CAPI void U_EXPORT2 -ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { - ubrk_close(csm->iter); - csm->iter=iterToAdopt; -} - -#endif - /* UTF-8 string case mappings ----------------------------------------------- */ /* TODO(markus): Move to a new, separate utf8case.c file. */ @@ -262,37 +251,29 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, #if !UCONFIG_NO_BREAK_ITERATION -/* - * Internal titlecasing function. - */ -static int32_t -_toTitle(UCaseMap *csm, +U_CFUNC int32_t U_CALLCONV +ucasemap_internalUTF8ToTitle(const UCaseMap *csm, uint8_t *dest, int32_t destCapacity, - const uint8_t *src, UCaseContext *csc, - int32_t srcLength, + const uint8_t *src, int32_t srcLength, UErrorCode *pErrorCode) { - UText utext=UTEXT_INITIALIZER; const UChar *s; UChar32 c; int32_t prev, titleStart, titleLimit, idx, destIndex, length; UBool isFirstIndex; - utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); if(U_FAILURE(*pErrorCode)) { return 0; } - if(csm->iter==NULL) { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, - NULL, 0, - pErrorCode); - } - ubrk_setUText(csm->iter, &utext, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - utext_close(&utext); - return 0; - } + + // Use the C++ abstract base class to minimize dependencies. + // TODO: Change UCaseMap.iter to store a BreakIterator directly. + BreakIterator *bi=reinterpret_cast(csm->iter); /* set up local variables */ + int32_t locCache=csm->locCache; + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; destIndex=0; prev=0; isFirstIndex=TRUE; @@ -302,9 +283,9 @@ _toTitle(UCaseMap *csm, /* find next index where to titlecase */ if(isFirstIndex) { isFirstIndex=FALSE; - idx=ubrk_first(csm->iter); + idx=bi->first(); } else { - idx=ubrk_next(csm->iter); + idx=bi->next(); } if(idx==UBRK_DONE || idx>srcLength) { idx=srcLength; @@ -354,15 +335,14 @@ _toTitle(UCaseMap *csm, if(titleStartcpStart=titleStart; - csc->cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache); + csc.cpStart=titleStart; + csc.cpLimit=titleLimit; + c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); - /* Special case Dutch IJ titlecasing */ if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && + ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && ( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) && ( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) { c=0x004A; @@ -377,7 +357,7 @@ _toTitle(UCaseMap *csm, _caseMap( csm, ucase_toFullLower, dest+destIndex, destCapacity-destIndex, - src, csc, + src, &csc, titleLimit, idx, pErrorCode); } else { @@ -398,12 +378,41 @@ _toTitle(UCaseMap *csm, if(destIndex>destCapacity) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } - utext_close(&utext); return destIndex; } #endif +static int32_t U_CALLCONV +ucasemap_internalUTF8ToLower(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode) { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + return _caseMap( + csm, ucase_toFullLower, + dest, destCapacity, + src, &csc, 0, srcLength, + pErrorCode); +} + +static int32_t U_CALLCONV +ucasemap_internalUTF8ToUpper(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode) { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + return _caseMap( + csm, ucase_toFullUpper, + dest, destCapacity, + src, &csc, 0, srcLength, + pErrorCode); +} + static int32_t utf8_foldCase(const UCaseProps *csp, uint8_t *dest, int32_t destCapacity, @@ -442,19 +451,20 @@ utf8_foldCase(const UCaseProps *csp, return destIndex; } -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ - -/* common internal function for public API functions */ +static int32_t U_CALLCONV +ucasemap_internalUTF8Fold(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode) { + return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); +} -static int32_t -caseMap(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - int32_t toWhichCase, - UErrorCode *pErrorCode) { +U_CFUNC int32_t +ucasemap_mapUTF8(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + UErrorCode *pErrorCode) { int32_t destLength; /* check argument values */ @@ -484,42 +494,7 @@ caseMap(const UCaseMap *csm, return 0; } - destLength=0; - - if(toWhichCase==FOLD_CASE) { - destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, - csm->options, pErrorCode); - } else { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - - csc.p=(void *)src; - csc.limit=srcLength; - - if(toWhichCase==TO_LOWER) { - destLength=_caseMap(csm, ucase_toFullLower, - dest, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else if(toWhichCase==TO_UPPER) { - destLength=_caseMap(csm, ucase_toFullUpper, - dest, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else /* if(toWhichCase==TO_TITLE) */ { -#if UCONFIG_NO_BREAK_ITERATION - *pErrorCode=U_UNSUPPORTED_ERROR; -#else - /* UCaseMap is actually non-const in toTitle() APIs. */ - UCaseMap *tmp = (UCaseMap *)csm; - destLength=_toTitle(tmp, dest, destCapacity, - src, &csc, srcLength, - pErrorCode); -#endif - } - } - + destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode); return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); } @@ -530,10 +505,10 @@ ucasemap_utf8ToLower(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return caseMap(csm, + return ucasemap_mapUTF8(csm, (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, - TO_LOWER, pErrorCode); + ucasemap_internalUTF8ToLower, pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -541,34 +516,19 @@ ucasemap_utf8ToUpper(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return caseMap(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - TO_UPPER, pErrorCode); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToTitle(UCaseMap *csm, - char *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return caseMap(csm, + return ucasemap_mapUTF8(csm, (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, - TO_TITLE, pErrorCode); + ucasemap_internalUTF8ToUpper, pErrorCode); } -#endif - U_CAPI int32_t U_EXPORT2 ucasemap_utf8FoldCase(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return caseMap(csm, + return ucasemap_mapUTF8(csm, (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, - FOLD_CASE, pErrorCode); + ucasemap_internalUTF8Fold, pErrorCode); } diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp new file mode 100644 index 00000000000..1698c8eb752 --- /dev/null +++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp @@ -0,0 +1,67 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucasemap_titlecase_brkiter.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011jun02 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/ubrk.h" +#include "unicode/ucasemap.h" +#include "cmemory.h" +#include "ucase.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE + +U_CAPI const UBreakIterator * U_EXPORT2 +ucasemap_getBreakIterator(const UCaseMap *csm) { + return csm->iter; +} + +U_CAPI void U_EXPORT2 +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { + // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. + delete reinterpret_cast(csm->iter); + csm->iter=iterToAdopt; +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToTitle(UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + UText utext=UTEXT_INITIALIZER; + utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(csm->iter==NULL) { + csm->iter=ubrk_open(UBRK_WORD, csm->locale, + NULL, 0, + pErrorCode); + } + ubrk_setUText(csm->iter, &utext, pErrorCode); + int32_t length=ucasemap_mapUTF8(csm, + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToTitle, pErrorCode); + utext_close(&utext); + return length; +} + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/icu4c/source/common/ucharstriebuilder.cpp b/icu4c/source/common/ucharstriebuilder.cpp index b1aa0d1b8ea..24b46f50837 100644 --- a/icu4c/source/common/ucharstriebuilder.cpp +++ b/icu4c/source/common/ucharstriebuilder.cpp @@ -21,6 +21,7 @@ #include "uarrsort.h" #include "uassert.h" #include "uhash.h" +#include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -283,7 +284,7 @@ UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UCha UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode) : LinearMatchNode(len, nextNode), s(units) { - hash=hash*37+uhash_hashUCharsN(units, len); + hash=hash*37+ustr_hashUCharsN(units, len); } UBool diff --git a/icu4c/source/common/uchriter.cpp b/icu4c/source/common/uchriter.cpp index f2878815e4f..6de1e8b8f57 100644 --- a/icu4c/source/common/uchriter.cpp +++ b/icu4c/source/common/uchriter.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1998-2010, International Business Machines Corporation and +* Copyright (C) 1998-2011, International Business Machines Corporation and * others. All Rights Reserved. ****************************************************************************** */ @@ -9,7 +9,7 @@ #include "unicode/uchriter.h" #include "unicode/ustring.h" -#include "uhash.h" +#include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -83,7 +83,7 @@ UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const { int32_t UCharCharacterIterator::hashCode() const { - return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end; + return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end; } CharacterIterator* diff --git a/icu4c/source/common/ucln.h b/icu4c/source/common/ucln.h index 3f9847f4448..951ad192d91 100644 --- a/icu4c/source/common/ucln.h +++ b/icu4c/source/common/ucln.h @@ -1,11 +1,11 @@ /* ****************************************************************************** -* * -* Copyright (C) 2001-2010, International Business Machines * -* Corporation and others. All Rights Reserved. * -* * +* +* Copyright (C) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* ****************************************************************************** -* file name: ucln_cmn.h +* file name: ucln.h * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 @@ -18,6 +18,7 @@ #define __UCLN_H__ #include "unicode/utypes.h" +#include "umutex.h" /** These are the functions used to register a library's memory cleanup * functions. Each library should define a single library register function @@ -81,9 +82,16 @@ U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type, /** * Request cleanup for one specific library. * Not thread safe. - * Calling this with UCLN_COMMON just calls u_cleanup(); * @param type which library to cleanup */ U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type); +/* ucln_cmn.c variables shared with uinit.c */ +U_CDECL_BEGIN + +extern UBool gICUInitialized; +extern UMTX gICUInitMutex; + +U_CDECL_END + #endif diff --git a/icu4c/source/common/ucln_cmn.c b/icu4c/source/common/ucln_cmn.c index 498c15e29f8..45358736bd2 100644 --- a/icu4c/source/common/ucln_cmn.c +++ b/icu4c/source/common/ucln_cmn.c @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 2001-2010, International Business Machines +* Copyright (C) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: ucln_cmn.c @@ -15,7 +15,6 @@ #include "unicode/utypes.h" #include "unicode/uclean.h" #include "utracimp.h" -#include "ustr_imp.h" #include "ucln_cmn.h" #include "umutex.h" #include "ucln.h" @@ -23,24 +22,43 @@ #include "uassert.h" /** Auto-client for UCLN_COMMON **/ -#define UCLN_TYPE UCLN_COMMON +#define UCLN_TYPE_IS_COMMON #include "ucln_imp.h" +U_CDECL_BEGIN + +UBool gICUInitialized = FALSE; +UMTX gICUInitMutex = NULL; + +U_CDECL_END + static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT]; static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON]; +/************************************************ + The cleanup order is important in this function. + Please be sure that you have read ucln.h + ************************************************/ +U_CAPI void U_EXPORT2 +u_cleanup(void) +{ + UTRACE_ENTRY_OC(UTRACE_U_CLEANUP); + umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */ + umtx_unlock(NULL); /* all state left around by any other threads. */ -/* Enables debugging information about when a library is cleaned up. */ -#ifndef UCLN_DEBUG_CLEANUP -#define UCLN_DEBUG_CLEANUP 0 -#endif - + ucln_lib_cleanup(); -#if defined(UCLN_DEBUG_CLEANUP) -#include -#endif + umtx_destroy(&gICUInitMutex); + umtx_cleanup(); + cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */ + gICUInitialized = FALSE; + UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */ +/*#if U_ENABLE_TRACING*/ + utrace_cleanup(); +/*#endif*/ +} -static void ucln_cleanup_internal(ECleanupLibraryType libType) +U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) { if (gLibCleanupFunctions[libType]) { @@ -49,22 +67,6 @@ static void ucln_cleanup_internal(ECleanupLibraryType libType) } } -U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) -{ - if(libType==UCLN_COMMON) { -#if UCLN_DEBUG_CLEANUP - fprintf(stderr, "Cleaning up: UCLN_COMMON with u_cleanup, type %d\n", (int)libType); -#endif - u_cleanup(); - } else { -#if UCLN_DEBUG_CLEANUP - fprintf(stderr, "Cleaning up: using ucln_cleanup_internal, type %d\n", (int)libType); -#endif - ucln_cleanup_internal(libType); - } -} - - U_CFUNC void ucln_common_registerCleanup(ECleanupCommonType type, cleanupFunc *func) @@ -95,7 +97,7 @@ U_CFUNC UBool ucln_lib_cleanup(void) { ECleanupCommonType commonFunc = UCLN_COMMON_START; for (libType++; libType> 1); + } } static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { @@ -176,9 +188,6 @@ static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *e return; } - /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */ - initializeSets(); - cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); if (cnv->extraInfo != NULL) { @@ -225,14 +234,6 @@ static void _ISCIIClose(UConverter *cnv) { } cnv->extraInfo=NULL; } - if (PNJ_CONSONANT_SET != NULL) { - uset_close(PNJ_CONSONANT_SET); - PNJ_CONSONANT_SET = NULL; - } - if (PNJ_BINDI_TIPPI_SET != NULL) { - uset_close(PNJ_BINDI_TIPPI_SET); - PNJ_BINDI_TIPPI_SET = NULL; - } } static const char* _ISCIIgetName(const UConverter* cnv) { @@ -1031,7 +1032,7 @@ static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( converterData->contextCharFromUnicode = 0x00; break; } - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) { + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ /* reset context char */ converterData->contextCharFromUnicode = 0x0000; @@ -1425,7 +1426,7 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar if (*toUnicodeStatus != missingCharMarker) { /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ - if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) && + if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) { /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ offset = (int)(source-args->source - 3); @@ -1444,10 +1445,10 @@ static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *ar /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. */ - if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) { + if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { targetUniChar = PNJ_TIPPI - PNJ_DELTA; WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); - } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) { + } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; } else { diff --git a/icu4c/source/common/ucnvsel.cpp b/icu4c/source/common/ucnvsel.cpp index cc626eecd2e..03744df94d4 100644 --- a/icu4c/source/common/ucnvsel.cpp +++ b/icu4c/source/common/ucnvsel.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2008-2009, International Business Machines +* Copyright (C) 2008-2011, International Business Machines * Corporation, Google and others. All Rights Reserved. * ******************************************************************************* @@ -26,6 +26,8 @@ #include "unicode/ucnvsel.h" +#if !UCONFIG_NO_CONVERSION + #include #include "unicode/uchar.h" @@ -809,3 +811,5 @@ ucnvsel_selectForUTF8(const UConverterSelector* sel, } return selectForMask(sel, mask, status); } + +#endif // !UCONFIG_NO_CONVERSION diff --git a/icu4c/source/common/uhash.c b/icu4c/source/common/uhash.c index 41c47162d93..5dd6ba3441c 100644 --- a/icu4c/source/common/uhash.c +++ b/icu4c/source/common/uhash.c @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * Date Name Description @@ -15,6 +15,7 @@ #include "cstring.h" #include "cmemory.h" #include "uassert.h" +#include "ustr_imp.h" /* This hashtable is implemented as a double hash. All elements are * stored in a single array with no secondary storage for collision @@ -832,58 +833,26 @@ uhash_tokp(void* p) { * PUBLIC Key Hash Functions ********************************************************************/ -/* - Compute the hash by iterating sparsely over about 32 (up to 63) - characters spaced evenly through the string. For each character, - multiply the previous hash value by a prime number and add the new - character in, like a linear congruential random number generator, - producing a pseudorandom deterministic value well distributed over - the output range. [LIU] -*/ - -#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ - int32_t hash = 0; \ - const TYPE *p = (const TYPE*) STR; \ - if (p != NULL) { \ - int32_t len = (int32_t)(STRLEN); \ - int32_t inc = ((len - 32) / 32) + 1; \ - const TYPE *limit = p + len; \ - while (puhash_remove, - * uhash_close, or uhash_put to delete - * an existing key or value. - * @param obj A key or value stored in a hashtable - * @see uhash_deleteUObject - */ -typedef void U_CALLCONV UObjectDeleter(void* obj); + +/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */ /** * This specifies whether or not, and how, the hastable resizes itself. @@ -579,13 +574,6 @@ uhash_hashUChars(const UHashTok key); U_CAPI int32_t U_EXPORT2 uhash_hashChars(const UHashTok key); -/* Used by UnicodeString to compute its hashcode - Not public API. */ -U_CAPI int32_t U_EXPORT2 -uhash_hashUCharsN(const UChar *key, int32_t length); - -U_CAPI int32_t U_EXPORT2 -uhash_hashCharsN(const char *key, int32_t length); - /** * Generate a case-insensitive hash code for a null-terminated char* * string. If the string is not null-terminated do not use this @@ -666,13 +654,6 @@ uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2); U_CAPI UBool U_EXPORT2 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2); -/** - * Deleter function for UnicodeString* keys or values. - * @param obj The object to be deleted - */ -U_CAPI void U_EXPORT2 -uhash_deleteUnicodeString(void *obj); - /******************************************************************** * int32_t Support Functions ********************************************************************/ @@ -705,20 +686,7 @@ uhash_compareLong(const UHashTok key1, const UHashTok key2); U_CAPI void U_EXPORT2 uhash_deleteHashtable(void *obj); -/** - * Deleter for UObject instances. - * @param obj The object to be deleted - */ -U_CAPI void U_EXPORT2 -uhash_deleteUObject(void *obj); - -/** - * Deleter for any key or value allocated using uprv_malloc. Calls - * uprv_free. - * @param obj The object to be deleted - */ -U_CAPI void U_EXPORT2 -uhash_freeBlock(void *obj); +/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */ /** * Checks if the given hash tables are equal or not. diff --git a/icu4c/source/common/uhash_us.cpp b/icu4c/source/common/uhash_us.cpp index c4ca3ca76e5..71a41cbe29a 100644 --- a/icu4c/source/common/uhash_us.cpp +++ b/icu4c/source/common/uhash_us.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * Date Name Description @@ -10,42 +10,7 @@ ****************************************************************************** */ -#include "uhash.h" #include "hash.h" -#include "uvector.h" -#include "unicode/unistr.h" -#include "unicode/uchar.h" - -/******************************************************************** - * PUBLIC UnicodeString support functions for UHashtable - ********************************************************************/ - -U_CAPI int32_t U_EXPORT2 -uhash_hashUnicodeString(const UHashTok key) { - U_NAMESPACE_USE - const UnicodeString *str = (const UnicodeString*) key.pointer; - return (str == NULL) ? 0 : str->hashCode(); -} - -U_CAPI void U_EXPORT2 -uhash_deleteUnicodeString(void *obj) { - U_NAMESPACE_USE - delete (UnicodeString*) obj; -} - -U_CAPI UBool U_EXPORT2 -uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) { - U_NAMESPACE_USE - const UnicodeString *str1 = (const UnicodeString*) key1.pointer; - const UnicodeString *str2 = (const UnicodeString*) key2.pointer; - if (str1 == str2) { - return TRUE; - } - if (str1 == NULL || str2 == NULL) { - return FALSE; - } - return *str1 == *str2; -} /** * Deleter for Hashtable objects. @@ -56,13 +21,4 @@ uhash_deleteHashtable(void *obj) { delete (Hashtable*) obj; } -/** - * Deleter for UObject instances. - */ -U_CAPI void U_EXPORT2 -uhash_deleteUObject(void *obj) { - U_NAMESPACE_USE - delete (UObject*) obj; -} - //eof diff --git a/icu4c/source/common/uinit.c b/icu4c/source/common/uinit.c index 530d21307ec..bdbf9102e3b 100644 --- a/icu4c/source/common/uinit.c +++ b/icu4c/source/common/uinit.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * * -* Copyright (C) 2001-2010, International Business Machines * +* Copyright (C) 2001-2011, International Business Machines * * Corporation and others. All Rights Reserved. * * * ****************************************************************************** @@ -19,40 +19,11 @@ #include "unicode/uclean.h" #include "cmemory.h" #include "icuplugimp.h" -#include "uassert.h" #include "ucln.h" -#include "ucln_cmn.h" #include "ucnv_io.h" #include "umutex.h" #include "utracimp.h" -static UBool gICUInitialized = FALSE; -static UMTX gICUInitMutex = NULL; - - -/************************************************ - The cleanup order is important in this function. - Please be sure that you have read ucln.h - ************************************************/ -U_CAPI void U_EXPORT2 -u_cleanup(void) -{ - UTRACE_ENTRY_OC(UTRACE_U_CLEANUP); - umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */ - umtx_unlock(NULL); /* all state left around by any other threads. */ - - ucln_lib_cleanup(); - - umtx_destroy(&gICUInitMutex); - umtx_cleanup(); - cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */ - gICUInitialized = FALSE; - UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */ -/*#if U_ENABLE_TRACING*/ - utrace_cleanup(); -/*#endif*/ -} - /* * ICU Initialization Function. Need not be called. */ diff --git a/icu4c/source/common/unicode/ucnvsel.h b/icu4c/source/common/unicode/ucnvsel.h index 0830003f558..eb9588eb2db 100644 --- a/icu4c/source/common/unicode/ucnvsel.h +++ b/icu4c/source/common/unicode/ucnvsel.h @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2008-2010, International Business Machines +* Copyright (C) 2008-2011, International Business Machines * Corporation, Google and others. All Rights Reserved. * ******************************************************************************* @@ -19,8 +19,11 @@ #ifndef __ICU_UCNV_SEL_H__ #define __ICU_UCNV_SEL_H__ -#include "unicode/uset.h" #include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/uset.h" #include "unicode/utf16.h" #include "unicode/uenum.h" #include "unicode/ucnv.h" @@ -179,4 +182,6 @@ U_STABLE UEnumeration * U_EXPORT2 ucnvsel_selectForUTF8(const UConverterSelector* sel, const char *s, int32_t length, UErrorCode *status); +#endif /* !UCONFIG_NO_CONVERSION */ + #endif /* __ICU_UCNV_SEL_H__ */ diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index ea8d1d901e0..79c5d261514 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -24,6 +24,7 @@ U_NAMESPACE_BEGIN class BMPSet; class ParsePosition; +class RBBIRuleScanner; class SymbolTable; class UnicodeSetStringSpan; class UVector; @@ -1467,6 +1468,7 @@ private: virtual UBool matchesIndexValue(uint8_t v) const; private: + friend class RBBIRuleScanner; //---------------------------------------------------------------- // Implementation: Clone as thawed (see ICU4J Freezable) @@ -1478,10 +1480,16 @@ private: // Implementation: Pattern parsing //---------------------------------------------------------------- + void applyPatternIgnoreSpace(const UnicodeString& pattern, + ParsePosition& pos, + const SymbolTable* symbols, + UErrorCode& status); + void applyPattern(RuleCharacterIterator& chars, const SymbolTable* symbols, UnicodeString& rebuiltPat, uint32_t options, + UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), UErrorCode& ec); //---------------------------------------------------------------- diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 4507829b848..cf8b736ed85 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -31,6 +31,7 @@ #include "unicode/std_string.h" #include "unicode/stringpiece.h" #include "unicode/bytestream.h" +#include "unicode/ucasemap.h" struct UConverter; // unicode/ucnv.h class StringThreadTest; @@ -53,6 +54,21 @@ U_STABLE int32_t U_EXPORT2 u_strlen(const UChar *s); #endif +#ifndef U_STRING_CASE_MAPPER_DEFINED +#define U_STRING_CASE_MAPPER_DEFINED + +/** + * Internal string case mapping function type. + * @internal + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif + U_NAMESPACE_BEGIN class BreakIterator; // unicode/brkiter.h @@ -3355,12 +3371,13 @@ private: int32_t **pBufferToDelete = 0, UBool forceClone = FALSE); - // common function for case mappings + /** + * Common function for UnicodeString case mappings. + * The stringCaseMapper has the same type UStringCaseMapper + * as in ustr_imp.h for ustrcase_map(). + */ UnicodeString & - caseMap(BreakIterator *titleIter, - const char *locale, - uint32_t options, - int32_t toWhichCase); + caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); // ref counting void addRef(void); diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp index ed50e1758c2..9ff9ae58f2a 100644 --- a/icu4c/source/common/uniset.cpp +++ b/icu4c/source/common/uniset.cpp @@ -1559,7 +1559,7 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) { if (U_FAILURE(status)) { return FALSE; } - strings = new UVector(uhash_deleteUnicodeString, + strings = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, 1, status); if (strings == NULL) { // Check for memory allocation error. status = U_MEMORY_ALLOCATION_ERROR; diff --git a/icu4c/source/common/uniset_closure.cpp b/icu4c/source/common/uniset_closure.cpp new file mode 100644 index 00000000000..14197932b95 --- /dev/null +++ b/icu4c/source/common/uniset_closure.cpp @@ -0,0 +1,280 @@ +/* +******************************************************************************* +* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uniset_closure.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp +* to simplify dependencies. +* In particular, this depends on the BreakIterator, but the BreakIterator +* code also builds UnicodeSets from patterns and needs uniset_props. +*/ + +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/parsepos.h" +#include "unicode/uniset.h" +#include "cmemory.h" +#include "ruleiter.h" +#include "ucase.h" +#include "util.h" +#include "uvector.h" + +// initial storage. Must be >= 0 +// *** same as in uniset.cpp ! *** +#define START_EXTRA 16 + +U_NAMESPACE_BEGIN + +// TODO memory debugging provided inside uniset.cpp +// could be made available here but probably obsolete with use of modern +// memory leak checker tools +#define _dbgct(me) + +//---------------------------------------------------------------- +// Constructors &c +//---------------------------------------------------------------- + +UnicodeSet::UnicodeSet(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) : + len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), + bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), + fFlags(0) +{ + if(U_SUCCESS(status)){ + list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); + /* test for NULL */ + if(list == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + }else{ + allocateStrings(status); + applyPattern(pattern, options, symbols, status); + } + } + _dbgct(this); +} + +UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) : + len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), + bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), + fFlags(0) +{ + if(U_SUCCESS(status)){ + list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); + /* test for NULL */ + if(list == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + }else{ + allocateStrings(status); + applyPattern(pattern, pos, options, symbols, status); + } + } + _dbgct(this); +} + +//---------------------------------------------------------------- +// Public API +//---------------------------------------------------------------- + +UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + ParsePosition pos(0); + applyPattern(pattern, pos, options, symbols, status); + if (U_FAILURE(status)) return *this; + + int32_t i = pos.getIndex(); + + if (options & USET_IGNORE_SPACE) { + // Skip over trailing whitespace + ICU_Utility::skipWhitespace(pattern, i, TRUE); + } + + if (i != pattern.length()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; +} + +UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, + ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + if (U_FAILURE(status)) { + return *this; + } + if (isFrozen()) { + status = U_NO_WRITE_PERMISSION; + return *this; + } + // Need to build the pattern in a temporary string because + // _applyPattern calls add() etc., which set pat to empty. + UnicodeString rebuiltPat; + RuleCharacterIterator chars(pattern, symbols, pos); + applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); + if (U_FAILURE(status)) return *this; + if (chars.inVariable()) { + // syntaxError(chars, "Extra chars in variable value"); + status = U_MALFORMED_SET; + return *this; + } + setPattern(rebuiltPat); + return *this; +} + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +static void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +static void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +static void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); +} + +//---------------------------------------------------------------- +// Case folding API +//---------------------------------------------------------------- + +// add the result of a full case mapping to the set +// use str as a temporary string to avoid constructing one +static inline void +addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { + if(result >= 0) { + if(result > UCASE_MAX_STRING_LENGTH) { + // add a single-code point case mapping + set.add(result); + } else { + // add a string case mapping from full with length result + str.setTo((UBool)FALSE, full, result); + set.add(str); + } + } + // result < 0: the code point mapped to itself, no need to add it + // see ucase.h +} + +UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { + if (isFrozen() || isBogus()) { + return *this; + } + if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { + const UCaseProps *csp = ucase_getSingleton(); + { + UnicodeSet foldSet(*this); + UnicodeString str; + USetAdder sa = { + foldSet.toUSet(), + _set_add, + _set_addRange, + _set_addString, + NULL, // don't need remove() + NULL // don't need removeRange() + }; + + // start with input set to guarantee inclusion + // USET_CASE: remove strings because the strings will actually be reduced (folded); + // therefore, start with no strings and add only those needed + if (attribute & USET_CASE_INSENSITIVE) { + foldSet.strings->removeAllElements(); + } + + int32_t n = getRangeCount(); + UChar32 result; + const UChar *full; + int32_t locCache = 0; + + for (int32_t i=0; isize() > 0) { + if (attribute & USET_CASE_INSENSITIVE) { + for (int32_t j=0; jsize(); ++j) { + str = *(const UnicodeString *) strings->elementAt(j); + str.foldCase(); + if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { + foldSet.add(str); // does not map to code points: add the folded string itself + } + } + } else { + Locale root(""); +#if !UCONFIG_NO_BREAK_ITERATION + UErrorCode status = U_ZERO_ERROR; + BreakIterator *bi = BreakIterator::createWordInstance(root, status); + if (U_SUCCESS(status)) { +#endif + const UnicodeString *pStr; + + for (int32_t j=0; jsize(); ++j) { + pStr = (const UnicodeString *) strings->elementAt(j); + (str = *pStr).toLower(root); + foldSet.add(str); +#if !UCONFIG_NO_BREAK_ITERATION + (str = *pStr).toTitle(bi, root); + foldSet.add(str); +#endif + (str = *pStr).toUpper(root); + foldSet.add(str); + (str = *pStr).foldCase(); + foldSet.add(str); + } +#if !UCONFIG_NO_BREAK_ITERATION + } + delete bi; +#endif + } + } + *this = foldSet; + } + } + return *this; +} + +U_NAMESPACE_END diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index 95ed2640aa4..c4e46657f78 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -331,65 +331,15 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), fFlags(0) -{ - if(U_SUCCESS(status)){ - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - /* test for NULL */ - if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - }else{ - allocateStrings(status); - applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); - } - } - _dbgct(this); -} - -/** - * Constructs a set from the given pattern, optionally ignoring - * white space. See the class description for the syntax of the - * pattern language. - * @param pattern a string specifying what characters are in the set - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - */ -UnicodeSet::UnicodeSet(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) : - len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) -{ - if(U_SUCCESS(status)){ - list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); - /* test for NULL */ - if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - }else{ - allocateStrings(status); - applyPattern(pattern, options, symbols, status); - } - } - _dbgct(this); -} - -UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) : - len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), - bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), - fFlags(0) { if(U_SUCCESS(status)){ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); /* test for NULL */ if(list == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + status = U_MEMORY_ALLOCATION_ERROR; }else{ allocateStrings(status); - applyPattern(pattern, pos, options, symbols, status); + applyPattern(pattern, status); } } _dbgct(this); @@ -401,64 +351,46 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, UErrorCode& status) { - return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); -} - - -/** - * Modifies this set to represent the set specified by the given - * pattern, optionally ignoring white space. See the class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - */ -UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) { - if (U_FAILURE(status) || isFrozen()) { - return *this; - } - + // Equivalent to + // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); + // but without dependency on closeOver(). ParsePosition pos(0); - applyPattern(pattern, pos, options, symbols, status); + applyPatternIgnoreSpace(pattern, pos, NULL, status); if (U_FAILURE(status)) return *this; int32_t i = pos.getIndex(); - - if (options & USET_IGNORE_SPACE) { - // Skip over trailing whitespace - ICU_Utility::skipWhitespace(pattern, i, TRUE); - } - + // Skip over trailing whitespace + ICU_Utility::skipWhitespace(pattern, i, TRUE); if (i != pattern.length()) { status = U_ILLEGAL_ARGUMENT_ERROR; } return *this; } -UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, - ParsePosition& pos, - uint32_t options, - const SymbolTable* symbols, - UErrorCode& status) { - if (U_FAILURE(status) || isFrozen()) { - return *this; +void +UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, + ParsePosition& pos, + const SymbolTable* symbols, + UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (isFrozen()) { + status = U_NO_WRITE_PERMISSION; + return; } // Need to build the pattern in a temporary string because // _applyPattern calls add() etc., which set pat to empty. UnicodeString rebuiltPat; RuleCharacterIterator chars(pattern, symbols, pos); - applyPattern(chars, symbols, rebuiltPat, options, status); - if (U_FAILURE(status)) return *this; + applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status); + if (U_FAILURE(status)) return; if (chars.inVariable()) { // syntaxError(chars, "Extra chars in variable value"); status = U_MALFORMED_SET; - return *this; + return; } setPattern(rebuiltPat); - return *this; } /** @@ -511,6 +443,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, const SymbolTable* symbols, UnicodeString& rebuiltPat, uint32_t options, + UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), UErrorCode& ec) { if (U_FAILURE(ec)) return; @@ -647,7 +580,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, } switch (setMode) { case 1: - nested->applyPattern(chars, symbols, patLocal, options, ec); + nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec); break; case 2: chars.skipIgnored(opts); @@ -879,10 +812,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars, * patterns like /[^abc]/i work. */ if ((options & USET_CASE_INSENSITIVE) != 0) { - closeOver(USET_CASE_INSENSITIVE); + (this->*caseClosure)(USET_CASE_INSENSITIVE); } else if ((options & USET_ADD_CASE_MAPPINGS) != 0) { - closeOver(USET_ADD_CASE_MAPPINGS); + (this->*caseClosure)(USET_ADD_CASE_MAPPINGS); } if (invert) { complement(); @@ -1365,126 +1298,4 @@ void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, rebuiltPat.append(pattern, 0, pos.getIndex()); } -//---------------------------------------------------------------- -// Case folding API -//---------------------------------------------------------------- - -// add the result of a full case mapping to the set -// use str as a temporary string to avoid constructing one -static inline void -addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { - if(result >= 0) { - if(result > UCASE_MAX_STRING_LENGTH) { - // add a single-code point case mapping - set.add(result); - } else { - // add a string case mapping from full with length result - str.setTo((UBool)FALSE, full, result); - set.add(str); - } - } - // result < 0: the code point mapped to itself, no need to add it - // see ucase.h -} - -UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { - if (isFrozen() || isBogus()) { - return *this; - } - if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { - const UCaseProps *csp = ucase_getSingleton(); - { - UnicodeSet foldSet(*this); - UnicodeString str; - USetAdder sa = { - foldSet.toUSet(), - _set_add, - _set_addRange, - _set_addString, - NULL, // don't need remove() - NULL // don't need removeRange() - }; - - // start with input set to guarantee inclusion - // USET_CASE: remove strings because the strings will actually be reduced (folded); - // therefore, start with no strings and add only those needed - if (attribute & USET_CASE_INSENSITIVE) { - foldSet.strings->removeAllElements(); - } - - int32_t n = getRangeCount(); - UChar32 result; - const UChar *full; - int32_t locCache = 0; - - for (int32_t i=0; isize() > 0) { - if (attribute & USET_CASE_INSENSITIVE) { - for (int32_t j=0; jsize(); ++j) { - str = *(const UnicodeString *) strings->elementAt(j); - str.foldCase(); - if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { - foldSet.add(str); // does not map to code points: add the folded string itself - } - } - } else { - Locale root(""); -#if !UCONFIG_NO_BREAK_ITERATION - UErrorCode status = U_ZERO_ERROR; - BreakIterator *bi = BreakIterator::createWordInstance(root, status); - if (U_SUCCESS(status)) { -#endif - const UnicodeString *pStr; - - for (int32_t j=0; jsize(); ++j) { - pStr = (const UnicodeString *) strings->elementAt(j); - (str = *pStr).toLower(root); - foldSet.add(str); -#if !UCONFIG_NO_BREAK_ITERATION - (str = *pStr).toTitle(bi, root); - foldSet.add(str); -#endif - (str = *pStr).toUpper(root); - foldSet.add(str); - (str = *pStr).foldCase(); - foldSet.add(str); - } -#if !UCONFIG_NO_BREAK_ITERATION - } - delete bi; -#endif - } - } - *this = foldSet; - } - } - return *this; -} - U_NAMESPACE_END diff --git a/icu4c/source/common/unistr.cpp b/icu4c/source/common/unistr.cpp index 67e8f28992e..f181cd3f018 100644 --- a/icu4c/source/common/unistr.cpp +++ b/icu4c/source/common/unistr.cpp @@ -1482,7 +1482,7 @@ UnicodeString::doHashCode() const { /* Delegate hash computation to uhash. This makes UnicodeString * hashing consistent with UChar* hashing. */ - int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); + int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); if (hashCode == kInvalidHashCode) { hashCode = kEmptyHashCode; } @@ -1674,6 +1674,29 @@ UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, U_NAMESPACE_END +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +uhash_hashUnicodeString(const UHashTok key) { + const UnicodeString *str = (const UnicodeString*) key.pointer; + return (str == NULL) ? 0 : str->hashCode(); +} + +// Moved here from uhash_us.cpp so that using a UVector of UnicodeString* +// does not depend on hashtable code. +U_CAPI UBool U_EXPORT2 +uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) { + const UnicodeString *str1 = (const UnicodeString*) key1.pointer; + const UnicodeString *str2 = (const UnicodeString*) key2.pointer; + if (str1 == str2) { + return TRUE; + } + if (str1 == NULL || str2 == NULL) { + return FALSE; + } + return *str1 == *str2; +} + #ifdef U_STATIC_IMPLEMENTATION /* This should never be called. It is defined here to make sure that the @@ -1683,7 +1706,6 @@ but defining it here makes sure that it is included with this object file. This makes sure that static library dependencies are kept to a minimum. */ static void uprv_UnicodeStringDummy(void) { - U_NAMESPACE_USE delete [] (new UnicodeString[2]); } #endif diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index a048c04eff3..4dda68a1813 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 1999-2010, International Business Machines +* Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -18,13 +18,11 @@ #include "unicode/utypes.h" #include "unicode/putil.h" -#include "unicode/locid.h" #include "cstring.h" #include "cmemory.h" #include "unicode/ustring.h" #include "unicode/unistr.h" #include "unicode/uchar.h" -#include "unicode/ubrk.h" #include "ustr_imp.h" #include "uhash.h" @@ -84,23 +82,14 @@ UnicodeString::doCaseCompare(int32_t start, // Write implementation //======================================== -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ - UnicodeString & -UnicodeString::caseMap(BreakIterator *titleIter, - const char *locale, - uint32_t options, - int32_t toWhichCase) { +UnicodeString::caseMap(const UCaseMap *csm, + UStringCaseMapper *stringCaseMapper) { if(isEmpty() || !isWritable()) { // nothing to do return *this; } - const UCaseProps *csp=ucase_getSingleton(); - // We need to allocate a new buffer for the internal string case mapping function. // This is very similar to how doReplace() keeps the old array pointer // and deletes the old array itself after it is done. @@ -135,28 +124,8 @@ UnicodeString::caseMap(BreakIterator *titleIter, int32_t newLength; do { errorCode = U_ZERO_ERROR; - if(toWhichCase==TO_LOWER) { - newLength = ustr_toLower(csp, getArrayStart(), getCapacity(), - oldArray, oldLength, - locale, &errorCode); - } else if(toWhichCase==TO_UPPER) { - newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(), - oldArray, oldLength, - locale, &errorCode); - } else if(toWhichCase==TO_TITLE) { -#if UCONFIG_NO_BREAK_ITERATION - errorCode=U_UNSUPPORTED_ERROR; -#else - newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(), - oldArray, oldLength, - (UBreakIterator *)titleIter, locale, options, &errorCode); -#endif - } else { - newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(), - oldArray, oldLength, - options, - &errorCode); - } + newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), + oldArray, oldLength, &errorCode); setLength(newLength); } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); @@ -169,49 +138,12 @@ UnicodeString::caseMap(BreakIterator *titleIter, return *this; } -UnicodeString & -UnicodeString::toLower() { - return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toLower(const Locale &locale) { - return caseMap(0, locale.getName(), 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toUpper() { - return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); -} - -UnicodeString & -UnicodeString::toUpper(const Locale &locale) { - return caseMap(0, locale.getName(), 0, TO_UPPER); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter) { - return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); -} - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { - return caseMap(titleIter, locale.getName(), 0, TO_TITLE); -} - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { - return caseMap(titleIter, locale.getName(), options, TO_TITLE); -} - -#endif - UnicodeString & UnicodeString::foldCase(uint32_t options) { - /* The Locale parameter isn't used. Use "" instead. */ - return caseMap(0, "", options, FOLD_CASE); + UCaseMap csm=UCASEMAP_INITIALIZER; + csm.csp=ucase_getSingleton(); + csm.options=options; + return caseMap(&csm, ustrcase_internalFold); } U_NAMESPACE_END @@ -244,4 +176,3 @@ uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) { } return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; } - diff --git a/icu4c/source/common/unistr_case_locale.cpp b/icu4c/source/common/unistr_case_locale.cpp new file mode 100644 index 00000000000..a8d5704431e --- /dev/null +++ b/icu4c/source/common/unistr_case_locale.cpp @@ -0,0 +1,70 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: unistr_case_locale.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may31 +* created by: Markus W. Scherer +* +* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/unistr.h" +#include "cmemory.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +//======================================== +// Write implementation +//======================================== + +/* + * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. + * Do this fast because it is called with every function call. + */ +static inline void +setTempCaseMap(UCaseMap *csm, const char *locale) { + if(csm->csp==NULL) { + csm->csp=ucase_getSingleton(); + } + if(locale!=NULL && locale[0]==0) { + csm->locale[0]=0; + } else { + ustrcase_setTempCaseMapLocale(csm, locale); + } +} + +UnicodeString & +UnicodeString::toLower() { + return toLower(Locale::getDefault()); +} + +UnicodeString & +UnicodeString::toLower(const Locale &locale) { + UCaseMap csm=UCASEMAP_INITIALIZER; + setTempCaseMap(&csm, locale.getName()); + return caseMap(&csm, ustrcase_internalToLower); +} + +UnicodeString & +UnicodeString::toUpper() { + return toUpper(Locale::getDefault()); +} + +UnicodeString & +UnicodeString::toUpper(const Locale &locale) { + UCaseMap csm=UCASEMAP_INITIALIZER; + setTempCaseMap(&csm, locale.getName()); + return caseMap(&csm, ustrcase_internalToUpper); +} + +U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_titlecase_brkiter.cpp b/icu4c/source/common/unistr_titlecase_brkiter.cpp new file mode 100644 index 00000000000..9b60e2a0358 --- /dev/null +++ b/icu4c/source/common/unistr_titlecase_brkiter.cpp @@ -0,0 +1,90 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: unistr_titlecase_brkiter.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/ubrk.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "ustr_imp.h" + +static int32_t U_CALLCONV +unistr_case_internalToTitle(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + ubrk_setText(csm->iter, src, srcLength, pErrorCode); + return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode); +} + +/* + * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. + * Do this fast because it is called with every function call. + */ +static inline void +setTempCaseMap(UCaseMap *csm, const char *locale) { + if(csm->csp==NULL) { + csm->csp=ucase_getSingleton(); + } + if(locale!=NULL && locale[0]==0) { + csm->locale[0]=0; + } else { + ustrcase_setTempCaseMapLocale(csm, locale); + } +} + +U_NAMESPACE_BEGIN + +UnicodeString & +UnicodeString::toTitle(BreakIterator *titleIter) { + return toTitle(titleIter, Locale::getDefault(), 0); +} + +UnicodeString & +UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { + return toTitle(titleIter, locale, 0); +} + +UnicodeString & +UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { + UCaseMap csm=UCASEMAP_INITIALIZER; + csm.options=options; + setTempCaseMap(&csm, locale.getName()); + BreakIterator *bi=titleIter; + if(bi==NULL) { + UErrorCode errorCode=U_ZERO_ERROR; + bi=BreakIterator::createWordInstance(locale, errorCode); + if(U_FAILURE(errorCode)) { + setToBogus(); + return *this; + } + } + csm.iter=reinterpret_cast(bi); + caseMap(&csm, unistr_case_internalToTitle); + if(titleIter==NULL) { + delete bi; + } + return *this; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/icu4c/source/common/uobject.cpp b/icu4c/source/common/uobject.cpp index 515e943e10a..bf31c573ce2 100644 --- a/icu4c/source/common/uobject.cpp +++ b/icu4c/source/common/uobject.cpp @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2002-2008, International Business Machines +* Copyright (C) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -111,4 +111,9 @@ UObject::~UObject() {} U_NAMESPACE_END +U_NAMESPACE_USE +U_CAPI void U_EXPORT2 +uprv_deleteUObject(void *obj) { + delete reinterpret_cast(obj); +} diff --git a/icu4c/source/common/uprops.cpp b/icu4c/source/common/uprops.cpp index 81818b75859..3d57a861a5e 100644 --- a/icu4c/source/common/uprops.cpp +++ b/icu4c/source/common/uprops.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2010, International Business Machines +* Copyright (C) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -288,32 +288,6 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { } } -#if !UCONFIG_NO_NORMALIZATION - -U_CAPI uint8_t U_EXPORT2 -u_getCombiningClass(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); - if(U_SUCCESS(errorCode)) { - return impl->getCC(impl->getNorm16(c)); - } else { - return 0; - } -} - -static uint16_t -getFCD16(UChar32 c) { - UErrorCode errorCode=U_ZERO_ERROR; - const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode); - if(U_SUCCESS(errorCode)) { - return UTRIE2_GET16(trie, c); - } else { - return 0; - } -} - -#endif - struct IntProperty; typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); @@ -427,7 +401,7 @@ static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { } #else static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return getFCD16(c)>>8; + return unorm_getFCD16Simple(c)>>8; } #endif @@ -437,7 +411,7 @@ static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { } #else static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { - return getFCD16(c)&0xff; + return unorm_getFCD16Simple(c)&0xff; } #endif diff --git a/icu4c/source/common/uscript.c b/icu4c/source/common/uscript.c index 2a7000431f5..6193c80baff 100644 --- a/icu4c/source/common/uscript.c +++ b/icu4c/source/common/uscript.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1997-2010, International Business Machines +* Copyright (C) 1997-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -95,16 +95,3 @@ uscript_getCode(const char* nameOrAbbrOrLocale, } return numFilled; } - -U_CAPI const char* U_EXPORT2 -uscript_getName(UScriptCode scriptCode){ - return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, - U_LONG_PROPERTY_NAME); -} - -U_CAPI const char* U_EXPORT2 -uscript_getShortName(UScriptCode scriptCode){ - return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, - U_SHORT_PROPERTY_NAME); -} - diff --git a/icu4c/source/common/uset.cpp b/icu4c/source/common/uset.cpp index 74ddba37c15..5648a115d49 100644 --- a/icu4c/source/common/uset.cpp +++ b/icu4c/source/common/uset.cpp @@ -149,11 +149,6 @@ uset_clear(USet* set) { ((UnicodeSet*) set)->UnicodeSet::clear(); } -U_CAPI void U_EXPORT2 -uset_closeOver(USet* set, int32_t attributes) { - ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes); -} - U_CAPI void U_EXPORT2 uset_removeAllStrings(USet* set) { ((UnicodeSet*) set)->UnicodeSet::removeAllStrings(); diff --git a/icu4c/source/common/uset_props.cpp b/icu4c/source/common/uset_props.cpp index fb607390a2d..ed81d8b39dc 100644 --- a/icu4c/source/common/uset_props.cpp +++ b/icu4c/source/common/uset_props.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2002-2006, International Business Machines +* Copyright (C) 2002-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -38,7 +38,7 @@ uset_openPattern(const UChar* pattern, int32_t patternLength, *ec = U_MEMORY_ALLOCATION_ERROR; return 0; } - + if (U_FAILURE(*ec)) { delete set; set = NULL; @@ -58,7 +58,7 @@ uset_openPatternOptions(const UChar* pattern, int32_t patternLength, *ec = U_MEMORY_ALLOCATION_ERROR; return 0; } - + if (U_FAILURE(*ec)) { delete set; set = NULL; @@ -134,3 +134,8 @@ uset_toPattern(const USet* set, ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable); return pat.extract(result, resultCapacity, *ec); } + +U_CAPI void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes) { + ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes); +} diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index 43291d733dc..ee54d332ef0 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -19,7 +19,7 @@ #include "unicode/uiter.h" #include "ucase.h" -/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */ +/** Simple declaration to avoid including unicode/ubrk.h. */ #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR # define UBRK_TYPEDEF_UBREAK_ITERATOR typedef struct UBreakIterator UBreakIterator; @@ -105,48 +105,108 @@ typedef struct UCaseMap UCaseMap; # define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 } #endif -enum { - TO_LOWER, - TO_UPPER, - TO_TITLE, - FOLD_CASE -}; +U_CFUNC void +ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale); -U_CFUNC int32_t -ustr_toLower(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); +#ifndef U_STRING_CASE_MAPPER_DEFINED +#define U_STRING_CASE_MAPPER_DEFINED -U_CFUNC int32_t -ustr_toUpper(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); +/** + * String case mapping function type, used by ustrcase_map(). + * All error checking must be done. + * The UCaseMap must be fully initialized, with locale and/or iter set as needed. + * src and dest must not overlap. + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToLower(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToUpper(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); #if !UCONFIG_NO_BREAK_ITERATION +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToTitle(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Implements argument checking and buffer handling + * for string case mapping as a common function. + */ U_CFUNC int32_t -ustr_toTitle(const UCaseProps *csp, +ustrcase_map(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, uint32_t options, + UStringCaseMapper *stringCaseMapper, UErrorCode *pErrorCode); -#endif +/** + * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). + * UTF-8 version of UStringCaseMapper. + * All error checking must be done. + * The UCaseMap must be fully initialized, with locale and/or iter set as needed. + * src and dest must not overlap. + */ +typedef int32_t U_CALLCONV +UTF8CaseMapper(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** Implements UTF8CaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ucasemap_internalUTF8ToTitle(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode); /** - * Internal case folding function. + * Implements argument checking and buffer handling + * for UTF-8 string case mapping as a common function. */ U_CFUNC int32_t -ustr_foldCase(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode); +ucasemap_mapUTF8(const UCaseMap *csm, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + UErrorCode *pErrorCode); + +U_CAPI int32_t U_EXPORT2 +ustr_hashUCharsN(const UChar *str, int32_t length); + +U_CAPI int32_t U_EXPORT2 +ustr_hashCharsN(const char *str, int32_t length); + +U_CAPI int32_t U_EXPORT2 +ustr_hashICharsN(const char *str, int32_t length); /** * NUL-terminate a UChar * string if possible. diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp new file mode 100644 index 00000000000..4a2352eed50 --- /dev/null +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -0,0 +1,91 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ustr_titlecase_brkiter.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/ubrk.h" +#include "unicode/ucasemap.h" +#include "cmemory.h" +#include "ucase.h" +#include "ustr_imp.h" + +/* functions available in the common library (for unistr_case.cpp) */ + +/* + * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. + * Do this fast because it is called with every function call. + * Duplicate of the same function in ustrcase.cpp, to keep it inline. + */ +static inline void +setTempCaseMap(UCaseMap *csm, const char *locale) { + if(csm->csp==NULL) { + csm->csp=ucase_getSingleton(); + } + if(locale!=NULL && locale[0]==0) { + csm->locale[0]=0; + } else { + ustrcase_setTempCaseMapLocale(csm, locale); + } +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode) { + UCaseMap csm=UCASEMAP_INITIALIZER; + setTempCaseMap(&csm, locale); + if(titleIter!=NULL) { + ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode); + } else { + csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode); + } + int32_t length=ustrcase_map( + &csm, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, pErrorCode); + if(titleIter==NULL && csm.iter!=NULL) { + ubrk_close(csm.iter); + } + return length; +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_toTitle(UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + if(csm->iter!=NULL) { + ubrk_setText(csm->iter, src, srcLength, pErrorCode); + } else { + csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode); + } + return ustrcase_map( + csm, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, pErrorCode); +} + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index 15740527db5..17468c9d493 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -19,7 +19,7 @@ */ #include "unicode/utypes.h" -#include "unicode/uloc.h" +#include "unicode/brkiter.h" #include "unicode/ustring.h" #include "unicode/ucasemap.h" #include "unicode/ubrk.h" @@ -29,9 +29,11 @@ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +U_NAMESPACE_USE + /* string casing ------------------------------------------------------------ */ -/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ +/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ static inline int32_t appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, int32_t result, const UChar *s) { @@ -155,81 +157,31 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, return destIndex; } -static void -setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode * /*pErrorCode*/) { - /* - * We could call ucasemap_setLocale(), but here we really only care about - * the initial language subtag, we need not return the real string via - * ucasemap_getLocale(), and we don't care about only getting "x" from - * "x-some-thing" etc. - * - * We ignore locales with a longer-than-3 initial subtag. - * - * We also do not fill in the locCache because it is rarely used, - * and not worth setting unless we reuse it for many case mapping operations. - * (That's why UCaseMap was created.) - */ - int i; - char c; - - /* the internal functions require locale!=NULL */ - if(locale==NULL) { - locale=uloc_getDefault(); - } - for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { - csm->locale[i]=c; - } - if(i<=3) { - csm->locale[i]=0; /* Up to 3 non-separator characters. */ - } else { - csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ - } -} - -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - setTempCaseMapLocale(csm, locale, pErrorCode); - } -} - #if !UCONFIG_NO_BREAK_ITERATION -/* - * Internal titlecasing function. - */ -static int32_t -_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, UCaseContext *csc, - int32_t srcLength, - UErrorCode *pErrorCode) { +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToTitle(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { const UChar *s; UChar32 c; int32_t prev, titleStart, titleLimit, idx, destIndex, length; UBool isFirstIndex; - if(csm->iter!=NULL) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - } else { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, - src, srcLength, - pErrorCode); - } if(U_FAILURE(*pErrorCode)) { return 0; } + // Use the C++ abstract base class to minimize dependencies. + // TODO: Change UCaseMap.iter to store a BreakIterator directly. + BreakIterator *bi=reinterpret_cast(csm->iter); + /* set up local variables */ + int32_t locCache=csm->locCache; + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; destIndex=0; prev=0; isFirstIndex=TRUE; @@ -239,9 +191,9 @@ _toTitle(UCaseMap *csm, /* find next index where to titlecase */ if(isFirstIndex) { isFirstIndex=FALSE; - idx=ubrk_first(csm->iter); + idx=bi->first(); } else { - idx=ubrk_next(csm->iter); + idx=bi->next(); } if(idx==UBRK_DONE || idx>srcLength) { idx=srcLength; @@ -291,14 +243,14 @@ _toTitle(UCaseMap *csm, if(titleStartcpStart=titleStart; - csc->cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); + csc.cpStart=titleStart; + csc.cpLimit=titleLimit; + c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); /* Special case Dutch IJ titlecasing */ if ( titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && + ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { c=(UChar32) 0x004A; @@ -314,7 +266,7 @@ _toTitle(UCaseMap *csm, _caseMap( csm, ucase_toFullLower, dest+destIndex, destCapacity-destIndex, - src, csc, + src, &csc, titleLimit, idx, pErrorCode); } else { @@ -338,83 +290,41 @@ _toTitle(UCaseMap *csm, return destIndex; } -#endif +#endif // !UCONFIG_NO_BREAK_ITERATION /* functions available in the common library (for unistr_case.cpp) */ -U_CFUNC int32_t -ustr_toLower(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToLower(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { UCaseContext csc=UCASECONTEXT_INITIALIZER; - - csm.csp=csp; - setTempCaseMap(&csm, locale, pErrorCode); csc.p=(void *)src; csc.limit=srcLength; - - return _caseMap(&csm, ucase_toFullLower, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); + return _caseMap( + csm, ucase_toFullLower, + dest, destCapacity, + src, &csc, 0, srcLength, + pErrorCode); } -U_CFUNC int32_t -ustr_toUpper(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToUpper(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { UCaseContext csc=UCASECONTEXT_INITIALIZER; - - csm.csp=csp; - setTempCaseMap(&csm, locale, pErrorCode); csc.p=(void *)src; csc.limit=srcLength; - - return _caseMap(&csm, ucase_toFullUpper, - dest, destCapacity, - src, &csc, 0, srcLength, - pErrorCode); + return _caseMap( + csm, ucase_toFullUpper, + dest, destCapacity, + src, &csc, 0, srcLength, + pErrorCode); } -#if !UCONFIG_NO_BREAK_ITERATION - -U_CFUNC int32_t -ustr_toTitle(const UCaseProps *csp, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, uint32_t options, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - UCaseContext csc=UCASECONTEXT_INITIALIZER; - int32_t length; - - csm.csp=csp; - csm.iter=titleIter; - csm.options=options; - setTempCaseMap(&csm, locale, pErrorCode); - csc.p=(void *)src; - csc.limit=srcLength; - - length=_toTitle(&csm, - dest, destCapacity, - src, &csc, srcLength, - pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); - } - return length; -} - -#endif - -U_CFUNC int32_t +static int32_t ustr_foldCase(const UCaseProps *csp, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, @@ -444,26 +354,27 @@ ustr_foldCase(const UCaseProps *csp, return destIndex; } -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ - -/* common internal function for public API functions */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); +} -static int32_t -caseMap(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - int32_t toWhichCase, - UErrorCode *pErrorCode) { +U_CFUNC int32_t +ustrcase_map(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + UErrorCode *pErrorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { return 0; } if( destCapacity<0 || @@ -501,40 +412,7 @@ caseMap(const UCaseMap *csm, temp=dest; } - destLength=0; - - if(toWhichCase==FOLD_CASE) { - destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, - csm->options, pErrorCode); - } else { - UCaseContext csc=UCASECONTEXT_INITIALIZER; - - csc.p=(void *)src; - csc.limit=srcLength; - - if(toWhichCase==TO_LOWER) { - destLength=_caseMap(csm, ucase_toFullLower, - temp, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else if(toWhichCase==TO_UPPER) { - destLength=_caseMap(csm, ucase_toFullUpper, - temp, destCapacity, - src, &csc, - 0, srcLength, - pErrorCode); - } else /* if(toWhichCase==TO_TITLE) */ { -#if UCONFIG_NO_BREAK_ITERATION - *pErrorCode=U_UNSUPPORTED_ERROR; -#else - /* UCaseMap is actually non-const in toTitle() APIs. */ - destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, - src, &csc, srcLength, - pErrorCode); -#endif - } - } + destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ if(destLength>0) { @@ -553,68 +431,6 @@ caseMap(const UCaseMap *csm, /* public API functions */ -U_CAPI int32_t U_EXPORT2 -u_strToLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale, pErrorCode); - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_LOWER, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -u_strToUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale, pErrorCode); - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_UPPER, pErrorCode); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -U_CAPI int32_t U_EXPORT2 -u_strToTitle(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, - UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - int32_t length; - - csm.iter=titleIter; - setTempCaseMap(&csm, locale, pErrorCode); - length=caseMap(&csm, - dest, destCapacity, - src, srcLength, - TO_TITLE, pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); - } - return length; -} - -U_CAPI int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - return caseMap(csm, - dest, destCapacity, - src, srcLength, - TO_TITLE, pErrorCode); -} - -#endif - U_CAPI int32_t U_EXPORT2 u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, @@ -623,10 +439,11 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, UCaseMap csm=UCASEMAP_INITIALIZER; csm.csp=ucase_getSingleton(); csm.options=options; - return caseMap(&csm, - dest, destCapacity, - src, srcLength, - FOLD_CASE, pErrorCode); + return ustrcase_map( + &csm, + dest, destCapacity, + src, srcLength, + ustrcase_internalFold, pErrorCode); } /* case-insensitive string comparisons -------------------------------------- */ diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp new file mode 100644 index 00000000000..5707c5a5be1 --- /dev/null +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -0,0 +1,110 @@ +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ustrcase_locale.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may31 +* created by: Markus W. Scherer +* +* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" +#include "unicode/ucasemap.h" +#include "unicode/uloc.h" +#include "unicode/ustring.h" +#include "ucase.h" +#include "ustr_imp.h" + +U_CFUNC void +ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) { + /* + * We could call ucasemap_setLocale(), but here we really only care about + * the initial language subtag, we need not return the real string via + * ucasemap_getLocale(), and we don't care about only getting "x" from + * "x-some-thing" etc. + * + * We ignore locales with a longer-than-3 initial subtag. + * + * We also do not fill in the locCache because it is rarely used, + * and not worth setting unless we reuse it for many case mapping operations. + * (That's why UCaseMap was created.) + */ + int i; + char c; + + /* the internal functions require locale!=NULL */ + if(locale==NULL) { + // Do not call uprv_getDefaultLocaleID() because that does not see + // changes to the default locale via uloc_setDefault(). + // It would also be inefficient if used frequently because uprv_getDefaultLocaleID() + // does not cache the locale ID. + // + // Unfortunately, uloc_getDefault() has many dependencies. + // We only care about a small set of language subtags, + // and we do not need the locale ID to be canonicalized. + // + // Best is to not call case mapping functions with a NULL locale ID. + locale=uloc_getDefault(); + } + for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { + csm->locale[i]=c; + } + if(i<=3) { + csm->locale[i]=0; /* Up to 3 non-separator characters. */ + } else { + csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ + } +} + +/* + * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. + * Do this fast because it is called with every function call. + */ +static inline void +setTempCaseMap(UCaseMap *csm, const char *locale) { + if(csm->csp==NULL) { + csm->csp=ucase_getSingleton(); + } + if(locale!=NULL && locale[0]==0) { + csm->locale[0]=0; + } else { + ustrcase_setTempCaseMapLocale(csm, locale); + } +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) { + UCaseMap csm=UCASEMAP_INITIALIZER; + setTempCaseMap(&csm, locale); + return ustrcase_map( + &csm, + dest, destCapacity, + src, srcLength, + ustrcase_internalToLower, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) { + UCaseMap csm=UCASEMAP_INITIALIZER; + setTempCaseMap(&csm, locale); + return ustrcase_map( + &csm, + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, pErrorCode); +} diff --git a/icu4c/source/common/ustring.cpp b/icu4c/source/common/ustring.cpp index 2d181aa72b7..297b095d2d7 100644 --- a/icu4c/source/common/ustring.cpp +++ b/icu4c/source/common/ustring.cpp @@ -1463,3 +1463,47 @@ u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCod __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); return length; } + +// Compute the hash code for a string -------------------------------------- *** + +// Moved here from uhash.c so that UnicodeString::hashCode() does not depend +// on UHashtable code. + +/* + Compute the hash by iterating sparsely over about 32 (up to 63) + characters spaced evenly through the string. For each character, + multiply the previous hash value by a prime number and add the new + character in, like a linear congruential random number generator, + producing a pseudorandom deterministic value well distributed over + the output range. [LIU] +*/ + +#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \ + int32_t hash = 0; \ + const TYPE *p = (const TYPE*) STR; \ + if (p != NULL) { \ + int32_t len = (int32_t)(STRLEN); \ + int32_t inc = ((len - 32) / 32) + 1; \ + const TYPE *limit = p + len; \ + while (pUnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); - return set; -} - -//eof diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h index ac88bd04317..7cb2c5a2e74 100644 --- a/icu4c/source/common/util.h +++ b/icu4c/source/common/util.h @@ -23,7 +23,6 @@ U_NAMESPACE_BEGIN class UnicodeMatcher; -class UnicodeSet; class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ { public: @@ -236,15 +235,5 @@ private: U_NAMESPACE_END -/** - * Returns a new set with the Pattern_White_Space characters. - * The caller must close/delete the result. - * Stable set of characters, won't change. - * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ - * @internal - */ -U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2 -uprv_openPatternWhiteSpaceSet(UErrorCode* ec); - #endif //eof diff --git a/icu4c/source/common/uts46.cpp b/icu4c/source/common/uts46.cpp index e08ee3566e4..59792e5e6aa 100644 --- a/icu4c/source/common/uts46.cpp +++ b/icu4c/source/common/uts46.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines +* Copyright (C) 2010-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: uts46.cpp @@ -22,6 +22,7 @@ #include "cmemory.h" #include "cstring.h" #include "punycode.h" +#include "ubidi_props.h" #include "ustr_imp.h" #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) @@ -1102,6 +1103,7 @@ isASCIIOkBiDi(const char *s, int32_t length) { UBool UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { + const UBiDiProps *bdp=ubidi_getSingleton(); // [IDNA2008-Tables] // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER for(int32_t i=0; i -#include +//#include U_NAMESPACE_BEGIN UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex) @@ -47,15 +47,6 @@ sortCollateComparator(const void *context, const void *left, const void *right); static int32_t U_CALLCONV recordCompareFn(const void *context, const void *left, const void *right); -// -// UHash support function, delete a UnicodeSet -// TODO: move this function into uhash. -// -static void U_CALLCONV -uhash_deleteUnicodeSet(void *obj) { - delete static_cast(obj); -} - // UVector support function, delete a Bucket. static void U_CALLCONV alphaIndex_deleteBucket(void *obj) { @@ -183,7 +174,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) { // that are the same according to the collator UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector. - preferenceSorting.setDeleter(uhash_deleteUnicodeString); + preferenceSorting.setDeleter(uprv_deleteUObject); appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status); preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status); @@ -236,7 +227,7 @@ void AlphabeticIndex::buildIndex(UErrorCode &status) { const int32_t size = labelSet.size() - 1; if (size > maxLabelCount_) { UVector *newLabels = new UVector(status); - newLabels->setDeleter(uhash_deleteUnicodeString); + newLabels->setDeleter(uprv_deleteUObject); int32_t count = 0; int32_t old = -1; for (int32_t srcIndex=0; srcIndexsize(); srcIndex++) { @@ -580,13 +571,13 @@ void AlphabeticIndex::init(UErrorCode &status) { uhash_compareUnicodeString, // key Comparator, NULL, // value Comparator &status); - uhash_setKeyDeleter(alreadyIn_, uhash_deleteUnicodeString); - uhash_setValueDeleter(alreadyIn_, uhash_deleteUnicodeSet); + uhash_setKeyDeleter(alreadyIn_, uprv_deleteUObject); + uhash_setValueDeleter(alreadyIn_, uprv_deleteUObject); bucketList_ = new UVector(status); bucketList_->setDeleter(alphaIndex_deleteBucket); labels_ = new UVector(status); - labels_->setDeleter(uhash_deleteUnicodeString); + labels_->setDeleter(uprv_deleteUObject); labels_->setComparer(uhash_compareUnicodeString); inputRecords_ = new UVector(status); inputRecords_->setDeleter(alphaIndex_deleteRecord); @@ -839,7 +830,7 @@ UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErr } UVector *dest = new UVector(status); - dest->setDeleter(uhash_deleteUnicodeString); + dest->setDeleter(uprv_deleteUObject); for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) { if (results[i].length() > 0) { dest->addElement(results[i].clone(), status); @@ -876,7 +867,7 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { return NULL; } UVector *dest = new UVector(status); - dest->setDeleter(uhash_deleteUnicodeString); + dest->setDeleter(uprv_deleteUObject); if (dest == NULL && U_SUCCESS(status)) { status = U_MEMORY_ALLOCATION_ERROR; } diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index d41180a5e0f..b6a904aff02 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -437,7 +437,7 @@ protected: } else { ret->append((UChar)0x40); // '@' is a variant character ret->append(UNICODE_STRING("calendar=", 9)); - ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())])); + ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())], -1, US_INV)); } return ret; } diff --git a/icu4c/source/i18n/currpinf.cpp b/icu4c/source/i18n/currpinf.cpp index d3c95b1f7e0..a0d37f13a22 100644 --- a/icu4c/source/i18n/currpinf.cpp +++ b/icu4c/source/i18n/currpinf.cpp @@ -308,7 +308,7 @@ CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& st std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif - fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount), pattern, status); + fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status); } } } diff --git a/icu4c/source/i18n/decContext.c b/icu4c/source/i18n/decContext.c index 513e0215971..3b162f26995 100644 --- a/icu4c/source/i18n/decContext.c +++ b/icu4c/source/i18n/decContext.c @@ -1,7 +1,7 @@ /* ------------------------------------------------------------------ */ /* Decimal Context module */ /* ------------------------------------------------------------------ */ -/* Copyright (c) IBM Corporation, 2000-2010. All rights reserved. */ +/* Copyright (c) IBM Corporation, 2000-2011. All rights reserved. */ /* */ /* This software is made available under the terms of the */ /* ICU License -- ICU 1.8.1 and later. */ @@ -25,10 +25,12 @@ #include "decContext.h" /* context and base types */ #include "decNumberLocal.h" /* decNumber local types, etc. */ +#if 0 /* ICU: No need to test endianness at runtime. */ /* compile-time endian tester [assumes sizeof(Int)>1] */ static const Int mfcone=1; /* constant 1 */ static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */ #define LITEND *mfctop /* named flag; 1=little-endian */ +#endif /* ------------------------------------------------------------------ */ /* round-for-reround digits */ @@ -210,7 +212,9 @@ U_CAPI decContext * U_EXPORT2 uprv_decContextSetRounding(decContext *context, /* ------------------------------------------------------------------ */ U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) { context->status|=status; +#if 0 /* ICU: Do not raise signals. */ if (status & context->traps) raise(SIGFPE); +#endif return context;} /* decContextSetStatus */ /* ------------------------------------------------------------------ */ @@ -374,6 +378,7 @@ U_CAPI const char * U_EXPORT2 uprv_decContextStatusToString(const decContext *co /* */ /* No error is possible. */ /* ------------------------------------------------------------------ */ +#if 0 /* ICU: Unused function. Anyway, do not call printf(). */ U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) { Int res=0; /* optimist */ uInt dle=(uInt)DECLITEND; /* unsign */ @@ -391,6 +396,7 @@ U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) { } return res; } /* decContextTestEndian */ +#endif /* ------------------------------------------------------------------ */ /* decContextTestSavedStatus -- test bits in saved status */ diff --git a/icu4c/source/i18n/decimfmt.cpp b/icu4c/source/i18n/decimfmt.cpp index 45acd03d73b..1e2a9a09639 100644 --- a/icu4c/source/i18n/decimfmt.cpp +++ b/icu4c/source/i18n/decimfmt.cpp @@ -433,7 +433,7 @@ DecimalFormat::construct(UErrorCode& status, // For most locale, the patterns are probably the same for all // plural count. If not, the right pattern need to be re-applied // during format. - fCurrencyPluralInfo->getCurrencyPluralPattern("other", currencyPluralPatternForOther); + fCurrencyPluralInfo->getCurrencyPluralPattern(UNICODE_STRING("other", 5), currencyPluralPatternForOther); patternUsed = ¤cyPluralPatternForOther; // TODO: not needed? setCurrencyForSymbols(); @@ -509,7 +509,7 @@ DecimalFormat::setupCurrencyAffixPatterns(UErrorCode& status) { *fPosPrefixPattern, *fPosSuffixPattern, UCURR_SYMBOL_NAME); - fAffixPatternsForCurrency->put("default", affixPtn, status); + fAffixPatternsForCurrency->put(UNICODE_STRING("default", 7), affixPtn, status); } // save the unique currency plural patterns of this locale. @@ -556,14 +556,13 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern, const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules(); StringEnumeration* keywords = pluralRules->getKeywords(status); if (U_SUCCESS(status)) { - const char* pluralCountCh; - while ((pluralCountCh = keywords->next(NULL, status)) != NULL) { + const UnicodeString* pluralCount; + while ((pluralCount = keywords->snext(status)) != NULL) { if ( U_SUCCESS(status) ) { - UnicodeString pluralCount = UnicodeString(pluralCountCh); - expandAffixAdjustWidth(&pluralCount); + expandAffixAdjustWidth(pluralCount); AffixesForCurrency* affix = new AffixesForCurrency( fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix); - fAffixesForCurrency->put(pluralCount, affix, status); + fAffixesForCurrency->put(*pluralCount, affix, status); } } } @@ -584,16 +583,15 @@ DecimalFormat::setupCurrencyAffixes(const UnicodeString& pattern, const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules(); StringEnumeration* keywords = pluralRules->getKeywords(status); if (U_SUCCESS(status)) { - const char* pluralCountCh; - while ((pluralCountCh = keywords->next(NULL, status)) != NULL) { + const UnicodeString* pluralCount; + while ((pluralCount = keywords->snext(status)) != NULL) { if ( U_SUCCESS(status) ) { - UnicodeString pluralCount = UnicodeString(pluralCountCh); UnicodeString ptn; - fCurrencyPluralInfo->getCurrencyPluralPattern(pluralCount, ptn); - applyPatternInternally(pluralCount, ptn, false, parseErr, status); + fCurrencyPluralInfo->getCurrencyPluralPattern(*pluralCount, ptn); + applyPatternInternally(*pluralCount, ptn, false, parseErr, status); AffixesForCurrency* affix = new AffixesForCurrency( fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix); - fPluralAffixesForCurrency->put(pluralCount, affix, status); + fPluralAffixesForCurrency->put(*pluralCount, affix, status); } } } @@ -3281,17 +3279,13 @@ void DecimalFormat::expandAffix(const UnicodeString& pattern, // For other cases, pluralCount == null, // and plural names are not needed. int32_t len; - // TODO: num of char in plural count - char pluralCountChar[10]; - if (pluralCount->length() >= 10) { - break; - } - pluralCount->extract(0, pluralCount->length(), pluralCountChar); + CharString pluralCountChar; + pluralCountChar.appendInvariantChars(*pluralCount, ec); UBool isChoiceFormat; const UChar* s = ucurr_getPluralName(currencyUChars, fSymbols != NULL ? fSymbols->getLocale().getName() : Locale::getDefault().getName(), &isChoiceFormat, - pluralCountChar, &len, &ec); + pluralCountChar.data(), &len, &ec); affix += UnicodeString(s, len); handler.addAttribute(kCurrencyField, beginIdx, affix.length()); } else if(intl) { diff --git a/icu4c/source/i18n/dtitvfmt.cpp b/icu4c/source/i18n/dtitvfmt.cpp index 492fff73d08..6a5e84aeb7d 100644 --- a/icu4c/source/i18n/dtitvfmt.cpp +++ b/icu4c/source/i18n/dtitvfmt.cpp @@ -1349,7 +1349,8 @@ DateIntervalFormat::adjustFieldWidth(const UnicodeString& inputSkeleton, DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth); DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth); if ( differenceInfo == 2 ) { - adjustedPtn.findAndReplace("v", "z"); + adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */), + UnicodeString((UChar)0x7a /* z */)); } UBool inQuote = false; diff --git a/icu4c/source/i18n/dtitvinf.cpp b/icu4c/source/i18n/dtitvinf.cpp index 2cdcd2fc9de..f5a57dd35ab 100644 --- a/icu4c/source/i18n/dtitvinf.cpp +++ b/icu4c/source/i18n/dtitvinf.cpp @@ -267,42 +267,39 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err) int32_t size = ures_getSize(itvDtPtnResource); int32_t index; for ( index = 0; index < size; ++index ) { - UResourceBundle* oneRes = ures_getByIndex(itvDtPtnResource, index, - NULL, &status); + LocalUResourceBundlePointer oneRes(ures_getByIndex(itvDtPtnResource, index, + NULL, &status)); if ( U_SUCCESS(status) ) { - const char* skeleton = ures_getKey(oneRes); - if ( skeleton == NULL || - skeletonSet.geti(UnicodeString(skeleton)) == 1 ) { - ures_close(oneRes); + const char* skeleton = ures_getKey(oneRes.getAlias()); + if (skeleton == NULL) { continue; } - skeletonSet.puti(UnicodeString(skeleton), 1, status); + UnicodeString skeletonUniStr(skeleton, -1, US_INV); + if ( skeletonSet.geti(skeletonUniStr) == 1 ) { + continue; + } + skeletonSet.puti(skeletonUniStr, 1, status); if ( uprv_strcmp(skeleton, gFallbackPatternTag) == 0 ) { - ures_close(oneRes); continue; // fallback } - - UResourceBundle* intervalPatterns = ures_getByKey( - itvDtPtnResource, skeleton, NULL, &status); - + + LocalUResourceBundlePointer intervalPatterns(ures_getByKey( + itvDtPtnResource, skeleton, NULL, &status)); + if ( U_FAILURE(status) ) { - ures_close(intervalPatterns); - ures_close(oneRes); break; } if ( intervalPatterns == NULL ) { - ures_close(intervalPatterns); - ures_close(oneRes); continue; } - + const UChar* pattern; const char* key; int32_t ptLength; - int32_t ptnNum = ures_getSize(intervalPatterns); + int32_t ptnNum = ures_getSize(intervalPatterns.getAlias()); int32_t ptnIndex; for ( ptnIndex = 0; ptnIndex < ptnNum; ++ptnIndex ) { - pattern = ures_getNextString(intervalPatterns, &ptLength, &key, + pattern = ures_getNextString(intervalPatterns.getAlias(), &ptLength, &key, &status); if ( U_FAILURE(status) ) { break; @@ -323,12 +320,10 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& err) calendarField = UCAL_MINUTE; } if ( calendarField != UCAL_FIELD_COUNT ) { - setIntervalPatternInternally(skeleton, calendarField, pattern,status); + setIntervalPatternInternally(skeletonUniStr, calendarField, pattern,status); } } - ures_close(intervalPatterns); } - ures_close(oneRes); } } ures_close(itvDtPtnResource); diff --git a/icu4c/source/i18n/locdspnm.cpp b/icu4c/source/i18n/locdspnm.cpp index 83ce4ec4a61..d4f4121cd15 100644 --- a/icu4c/source/i18n/locdspnm.cpp +++ b/icu4c/source/i18n/locdspnm.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* -* Copyright (C) 2010, International Business Machines Corporation and * -* others. All Rights Reserved. * +* Copyright (C) 2010-2011, International Business Machines Corporation and +* others. All Rights Reserved. ******************************************************************************* */ @@ -418,7 +418,7 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, while ((key = e->next((int32_t *)0, status)) != NULL) { locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); appendWithSep(resultRemainder, keyDisplayName(key, temp)) - .append("=") + .append((UChar)0x3d /* = */) .append(keyValueDisplayName(key, value, temp2)); } delete e; diff --git a/icu4c/source/i18n/msgfmt.cpp b/icu4c/source/i18n/msgfmt.cpp index b1644e51ddb..4ed4526ada8 100644 --- a/icu4c/source/i18n/msgfmt.cpp +++ b/icu4c/source/i18n/msgfmt.cpp @@ -539,7 +539,7 @@ void MessageFormat::setArgStartFormat(int32_t argStart, delete formatter; return; } - uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject); + uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); } if (formatter == NULL) { formatter = new DummyFormat(); @@ -841,7 +841,7 @@ MessageFormat::getFormatNames(UErrorCode& status) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - fFormatNames->setDeleter(uhash_deleteUObject); + fFormatNames->setDeleter(uprv_deleteUObject); for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { fFormatNames->addElement(new UnicodeString(getArgName(partIndex)), status); @@ -1201,7 +1201,7 @@ void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) { if (U_FAILURE(ec)) { return; } - uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject); + uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); } const int32_t count = uhash_count(that.cachedFormatters); diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp index bd342b46874..3b8c114f47e 100644 --- a/icu4c/source/i18n/plurrule.cpp +++ b/icu4c/source/i18n/plurrule.cpp @@ -25,7 +25,6 @@ #include "plurrule_impl.h" #include "putilimp.h" #include "ucln_in.h" -#include "uhash.h" #include "ustrfmt.h" #include "locutil.h" @@ -1381,7 +1380,7 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode if (U_FAILURE(status)) { return; } - fKeywordNames.setDeleter(uhash_deleteUObject); + fKeywordNames.setDeleter(uprv_deleteUObject); UBool addKeywordOther=TRUE; RuleChain *node=header; while(node!=NULL) { diff --git a/icu4c/source/i18n/rbt_data.cpp b/icu4c/source/i18n/rbt_data.cpp index beef99231f9..bdb7ead8c3b 100644 --- a/icu4c/source/i18n/rbt_data.cpp +++ b/icu4c/source/i18n/rbt_data.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2008, International Business Machines +* Copyright (C) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -28,7 +28,7 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status) if (U_FAILURE(status)) { return; } - variableNames.setValueDeleter(uhash_deleteUnicodeString); + variableNames.setValueDeleter(uprv_deleteUObject); variables = 0; variablesLength = 0; } @@ -41,7 +41,7 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData& { UErrorCode status = U_ZERO_ERROR; int32_t i = 0; - variableNames.setValueDeleter(uhash_deleteUnicodeString); + variableNames.setValueDeleter(uprv_deleteUObject); int32_t pos = -1; const UHashElement *e; while ((e = other.variableNames.nextElement(pos)) != 0) { diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp index d05a0cc925b..54132e01eb4 100644 --- a/icu4c/source/i18n/rbt_pars.cpp +++ b/icu4c/source/i18n/rbt_pars.cpp @@ -825,11 +825,11 @@ idBlockVector(statusReturn), variablesVector(statusReturn), segmentObjects(statusReturn) { - idBlockVector.setDeleter(uhash_deleteUnicodeString); + idBlockVector.setDeleter(uprv_deleteUObject); curData = NULL; compoundFilter = NULL; parseData = NULL; - variableNames.setValueDeleter(uhash_deleteUnicodeString); + variableNames.setValueDeleter(uprv_deleteUObject); } /** diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index e42c5bab884..2c0d6caef59 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -3058,7 +3058,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // next step. Otherwise, all time zone names starting with GMT/UT/UTC // (for example, "UTT") will fail. if (gmtLen > 0 && ((text.length() - start) == gmtLen)) { - TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT")); + TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7)); cal.adoptTimeZone(tz); return start + gmtLen; } @@ -3115,7 +3115,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Step 5 // If we saw standalone GMT zero pattern, then use GMT. if (gmtLen > 0) { - TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT")); + TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7)); cal.adoptTimeZone(tz); return start + gmtLen; } diff --git a/icu4c/source/i18n/smpdtfst.cpp b/icu4c/source/i18n/smpdtfst.cpp index ecae3e7b775..2e138b84fb8 100644 --- a/icu4c/source/i18n/smpdtfst.cpp +++ b/icu4c/source/i18n/smpdtfst.cpp @@ -33,27 +33,27 @@ SimpleDateFormatStaticSets::SimpleDateFormatStaticSets(UErrorCode *status) fTimeIgnorables(NULL), fOtherIgnorables(NULL) { - fDateIgnorables = new UnicodeSet("[-,./[:whitespace:]]", *status); - fTimeIgnorables = new UnicodeSet("[-.:[:whitespace:]]", *status); - fOtherIgnorables = new UnicodeSet("[:whitespace:]", *status); - + fDateIgnorables = new UnicodeSet(UNICODE_STRING("[-,./[:whitespace:]]", 20), *status); + fTimeIgnorables = new UnicodeSet(UNICODE_STRING("[-.:[:whitespace:]]", 19), *status); + fOtherIgnorables = new UnicodeSet(UNICODE_STRING("[:whitespace:]", 14), *status); + // Check for null pointers if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) { goto ExitConstrDeleteAll; } - + // Freeze all the sets fDateIgnorables->freeze(); fTimeIgnorables->freeze(); fOtherIgnorables->freeze(); - + return; // If we reached this point, everything is fine so just exit - + ExitConstrDeleteAll: // Remove all sets and return error delete fDateIgnorables; fDateIgnorables = NULL; delete fTimeIgnorables; fTimeIgnorables = NULL; delete fOtherIgnorables; fOtherIgnorables = NULL; - + *status = U_MEMORY_ALLOCATION_ERROR; } diff --git a/icu4c/source/i18n/tblcoll.cpp b/icu4c/source/i18n/tblcoll.cpp index e2e62b4fcef..04344e07019 100644 --- a/icu4c/source/i18n/tblcoll.cpp +++ b/icu4c/source/i18n/tblcoll.cpp @@ -70,6 +70,7 @@ #include "cmemory.h" #include "cstring.h" #include "putilimp.h" +#include "ustr_imp.h" /* public RuleBasedCollator constructor ---------------------------------- */ @@ -644,7 +645,7 @@ int32_t RuleBasedCollator::hashCode() const { int32_t length; const UChar *rules = ucol_getRules(ucollator, &length); - return uhash_hashUCharsN(rules, length); + return ustr_hashUCharsN(rules, length); } /** diff --git a/icu4c/source/i18n/tmutfmt.cpp b/icu4c/source/i18n/tmutfmt.cpp index 979f68029f0..dbd3ec6e4a7 100644 --- a/icu4c/source/i18n/tmutfmt.cpp +++ b/icu4c/source/i18n/tmutfmt.cpp @@ -11,6 +11,7 @@ #if !UCONFIG_NO_FORMATTING +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "hash.h" @@ -495,12 +496,13 @@ TimeUnitFormat::readFromCurrentLocale(UTimeUnitFormatStyle style, const char* ke if (fNumberFormat != NULL) { messageFormat->setFormat(0, *fNumberFormat); } - MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount); + UnicodeString pluralCountUniStr(pluralCount, -1, US_INV); + MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCountUniStr); if (formatters == NULL) { formatters = (MessageFormat**)uprv_malloc(UTMUTFMT_FORMAT_STYLE_COUNT*sizeof(MessageFormat*)); formatters[UTMUTFMT_FULL_STYLE] = NULL; formatters[UTMUTFMT_ABBREVIATED_STYLE] = NULL; - countToPatterns->put(pluralCount, formatters, err); + countToPatterns->put(pluralCountUniStr, formatters, err); if (U_FAILURE(err)) { uprv_free(formatters); } @@ -557,8 +559,8 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE // StringEnumeration* keywords = fPluralRules->getKeywords(err); if (U_SUCCESS(err)) { - const char* pluralCount; - while ((pluralCount = keywords->next(NULL, err)) != NULL) { + const UnicodeString* pluralCount; + while ((pluralCount = keywords->snext(err)) != NULL) { if ( U_SUCCESS(err) ) { for (int32_t i = 0; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; ++i) { // for each time unit, @@ -572,13 +574,15 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE } fTimeUnitToCountToPatterns[i] = countToPatterns; } - MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount); + MessageFormat** formatters = (MessageFormat**)countToPatterns->get(*pluralCount); if( formatters == NULL || formatters[style] == NULL ) { // look through parents const char* localeName = fLocale.getName(); + CharString pluralCountChars; + pluralCountChars.appendInvariantChars(*pluralCount, err); searchInLocaleChain(style, key, localeName, (TimeUnit::UTimeUnitFields)i, - pluralCount, pluralCount, + *pluralCount, pluralCountChars.data(), countToPatterns, err); } } @@ -601,7 +605,7 @@ TimeUnitFormat::checkConsistency(UTimeUnitFormatStyle style, const char* key, UE void TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName, TimeUnit::UTimeUnitFields srcTimeUnitField, - const char* srcPluralCount, + const UnicodeString& srcPluralCount, const char* searchPluralCount, Hashtable* countToPatterns, UErrorCode& err) { diff --git a/icu4c/source/i18n/transreg.cpp b/icu4c/source/i18n/transreg.cpp index 8e1d4f431fd..90f0adafbeb 100644 --- a/icu4c/source/i18n/transreg.cpp +++ b/icu4c/source/i18n/transreg.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2010, International Business Machines +* Copyright (c) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -517,7 +517,7 @@ TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : availableIDs(status) { registry.setValueDeleter(deleteEntry); - availableIDs.setDeleter(uhash_deleteUnicodeString); + availableIDs.setDeleter(uprv_deleteUObject); availableIDs.setComparer(uhash_compareCaselessUnicodeString); specDAG.setValueDeleter(uhash_deleteHashtable); } @@ -936,12 +936,12 @@ void TransliteratorRegistry::registerSTV(const UnicodeString& source, if (U_FAILURE(status) || targets == 0) { return; } - targets->setValueDeleter(uhash_deleteUObject); + targets->setValueDeleter(uprv_deleteUObject); specDAG.put(source, targets, status); } UVector *variants = (UVector*) targets->get(target); if (variants == 0) { - variants = new UVector(uhash_deleteUnicodeString, + variants = new UVector(uprv_deleteUObject, uhash_compareCaselessUnicodeString, status); if (variants == 0) { return; diff --git a/icu4c/source/i18n/tridpars.cpp b/icu4c/source/i18n/tridpars.cpp index cf9fd3fb5b6..56cec9521d2 100644 --- a/icu4c/source/i18n/tridpars.cpp +++ b/icu4c/source/i18n/tridpars.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2009, International Business Machines Corporation +* Copyright (c) 2002-2011, International Business Machines Corporation * and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -907,7 +907,7 @@ void TransliteratorIDParser::init(UErrorCode &status) { status = U_MEMORY_ALLOCATION_ERROR; return; } - special_inverses->setValueDeleter(uhash_deleteUnicodeString); + special_inverses->setValueDeleter(uprv_deleteUObject); umtx_lock(&LOCK); if (SPECIAL_INVERSES == NULL) { diff --git a/icu4c/source/i18n/tzfmt.cpp b/icu4c/source/i18n/tzfmt.cpp index ff36195bbde..571d609a7f5 100644 --- a/icu4c/source/i18n/tzfmt.cpp +++ b/icu4c/source/i18n/tzfmt.cpp @@ -369,7 +369,7 @@ TimeZoneFormatDelegate::TimeZoneFormatDelegate(const Locale& locale, UErrorCode& if (!gTimeZoneFormatCacheInitialized) { gTimeZoneFormatCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); if (U_SUCCESS(status)) { - uhash_setKeyDeleter(gTimeZoneFormatCache, uhash_freeBlock); + uhash_setKeyDeleter(gTimeZoneFormatCache, uprv_free); uhash_setValueDeleter(gTimeZoneFormatCache, deleteTimeZoneFormatCacheEntry); gTimeZoneFormatCacheInitialized = TRUE; ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, timeZoneFormat_cleanup); diff --git a/icu4c/source/i18n/tzgnames.cpp b/icu4c/source/i18n/tzgnames.cpp index 3abcc36091b..9ace791007e 100644 --- a/icu4c/source/i18n/tzgnames.cpp +++ b/icu4c/source/i18n/tzgnames.cpp @@ -69,7 +69,7 @@ hashPartialLocationKey(const UHashTok key) { .append(p->mzID) .append((UChar)0x23) .append((UChar)(p->isLong ? 0x4C : 0x53)); - return uhash_hashUCharsN(str.getBuffer(), str.length()); + return str.hashCode(); } /** @@ -209,7 +209,7 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, if ((nameinfo->type & fTypes) != 0) { // matches a requested type if (fResults == NULL) { - fResults = new UVector(uhash_freeBlock, NULL, status); + fResults = new UVector(uprv_free, NULL, status); if (fResults == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } @@ -350,7 +350,7 @@ TimeZoneGenericNames::initialize(const Locale& locale, UErrorCode& status) { cleanup(); return; } - uhash_setKeyDeleter(fPartialLocationNamesMap, uhash_freeBlock); + uhash_setKeyDeleter(fPartialLocationNamesMap, uprv_free); // no value deleter // target region diff --git a/icu4c/source/i18n/tznames.cpp b/icu4c/source/i18n/tznames.cpp index 276807871a7..402db4daf77 100644 --- a/icu4c/source/i18n/tznames.cpp +++ b/icu4c/source/i18n/tznames.cpp @@ -132,7 +132,7 @@ TimeZoneNamesDelegate::TimeZoneNamesDelegate(const Locale& locale, UErrorCode& s if (!gTimeZoneNamesCacheInitialized) { gTimeZoneNamesCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); if (U_SUCCESS(status)) { - uhash_setKeyDeleter(gTimeZoneNamesCache, uhash_freeBlock); + uhash_setKeyDeleter(gTimeZoneNamesCache, uprv_free); uhash_setValueDeleter(gTimeZoneNamesCache, deleteTimeZoneNamesCacheEntry); gTimeZoneNamesCacheInitialized = TRUE; ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONENAMES, timeZoneNames_cleanup); @@ -277,7 +277,8 @@ TimeZoneNames::getExemplarLocationName(const UnicodeString& tzID, UnicodeString& int32_t sep = tzID.lastIndexOf((UChar)0x2F /* '/' */); if (sep > 0 && sep + 1 < tzID.length()) { name.setTo(tzID, sep + 1); - name.findAndReplace("_", " "); + name.findAndReplace(UnicodeString((UChar)0x5f /* _ */), + UnicodeString((UChar)0x20 /* space */)); } else { name.setToBogus(); } diff --git a/icu4c/source/i18n/tznames_impl.cpp b/icu4c/source/i18n/tznames_impl.cpp index 55aa8cc8a75..d2682ea3757 100644 --- a/icu4c/source/i18n/tznames_impl.cpp +++ b/icu4c/source/i18n/tznames_impl.cpp @@ -866,7 +866,7 @@ ZNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, if ((nameinfo->type & fTypes) != 0) { // matches a requested type if (fResults == NULL) { - fResults = new UVector(uhash_freeBlock, NULL, status); + fResults = new UVector(uprv_free, NULL, status); if (fResults == NULL) { status = U_MEMORY_ALLOCATION_ERROR; } @@ -1090,7 +1090,7 @@ TimeZoneNamesImpl::getMetaZoneID(const UnicodeString& tzID, UDate date, UnicodeS UnicodeString& TimeZoneNamesImpl::getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const { - ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region), tzID); + ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region, -1, US_INV), tzID); return tzID; } diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp index e6c5048d61c..cf7aed33d78 100644 --- a/icu4c/source/i18n/ucol_bld.cpp +++ b/icu4c/source/i18n/ucol_bld.cpp @@ -1397,7 +1397,7 @@ static const char* ReorderingTokenNames[] = { static void toUpper(const char* src, char* dst, uint32_t length) { for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) { - *dst = toupper(*src); + *dst = uprv_toupper(*src); } *dst = '\0'; } diff --git a/icu4c/source/i18n/ucol_elm.cpp b/icu4c/source/i18n/ucol_elm.cpp index bb686eed6f3..0488324b3ee 100644 --- a/icu4c/source/i18n/ucol_elm.cpp +++ b/icu4c/source/i18n/ucol_elm.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2001-2010, International Business Machines +* Copyright (C) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -157,7 +157,7 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat if (U_FAILURE(*status)) { goto allocation_failure; } - uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock); + uhash_setValueDeleter(t->prefixLookup, uprv_free); t->contractions = uprv_cnttab_open(t->mapping, status); if (U_FAILURE(*status)) { diff --git a/icu4c/source/i18n/ucol_tok.cpp b/icu4c/source/i18n/ucol_tok.cpp index 377f7584592..b70c3145330 100644 --- a/icu4c/source/i18n/ucol_tok.cpp +++ b/icu4c/source/i18n/ucol_tok.cpp @@ -135,12 +135,6 @@ U_CDECL_END #endif -/*static inline void U_CALLCONV -uhash_freeBlockWrapper(void *obj) { - uhash_freeBlock(obj); -}*/ - - typedef struct { uint32_t startCE; uint32_t startContCE; @@ -2367,7 +2361,7 @@ void ucol_tok_initTokenList( if(U_FAILURE(*status)) { return; } - uhash_setValueDeleter(src->tailored, uhash_freeBlock); + uhash_setValueDeleter(src->tailored, uprv_free); src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); /* test for NULL */ diff --git a/icu4c/source/i18n/unicode/tmutfmt.h b/icu4c/source/i18n/unicode/tmutfmt.h index 217176b315f..35f5dc6c6c2 100644 --- a/icu4c/source/i18n/unicode/tmutfmt.h +++ b/icu4c/source/i18n/unicode/tmutfmt.h @@ -243,7 +243,7 @@ private: // fill in fTimeUnitToCountToPatterns from locale fall-back chain void searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName, - TimeUnit::UTimeUnitFields field, const char*, + TimeUnit::UTimeUnitFields field, const UnicodeString&, const char*, Hashtable*, UErrorCode&); // initialize hash table diff --git a/icu4c/source/i18n/uspoof_conf.cpp b/icu4c/source/i18n/uspoof_conf.cpp index 199e0c4d769..c7a8c62ae50 100644 --- a/icu4c/source/i18n/uspoof_conf.cpp +++ b/icu4c/source/i18n/uspoof_conf.cpp @@ -233,19 +233,21 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL // Capture Group 8: A syntactically invalid line. Anything that didn't match before. // Example Line from the confusables.txt source file: // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... " - fParseLine = uregex_openC( + UnicodeString pattern( "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s) "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued) "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type "[ \\t]*(?:#.*?)?$" // Match any trailing #comment "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment - "|^(.*?)$", // OR match any line, which catches illegal lines. - 0, NULL, &status); + "|^(.*?)$", -1, US_INV); // OR match any line, which catches illegal lines. + // TODO: Why are we using the regex C API here? C++ would just take UnicodeString... + fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status); // Regular expression for parsing a hex number out of a space-separated list of them. // Capture group 1 gets the number, with spaces removed. - fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status); + pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)"); + fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status); // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. diff --git a/icu4c/source/i18n/uspoof_wsconf.cpp b/icu4c/source/i18n/uspoof_wsconf.cpp index de6fa0652d5..2417512de2b 100644 --- a/icu4c/source/i18n/uspoof_wsconf.cpp +++ b/icu4c/source/i18n/uspoof_wsconf.cpp @@ -52,7 +52,6 @@ U_NAMESPACE_USE // The expression will match _all_ lines, including erroneous lines. // The result of the parse is returned via the contents of the (match) groups. static const char *parseExp = - "(?m)" // Multi-line mode "^([ \\t]*(?:#.*?)?)$" // A blank or comment line. Matches Group 1. "|^(?:" // OR @@ -115,7 +114,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS, anyCaseTrie = utrie2_open(0, 0, &status); lowerCaseTrie = utrie2_open(0, 0, &status); - + + UnicodeString pattern(parseExp, -1, US_INV); // The scriptSets vector provides a mapping from TRIE values to the set of scripts. // @@ -150,10 +150,8 @@ void buildWSConfusableData(SpoofImpl *spImpl, const char * confusablesWS, } u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status); + parseRegexp = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status); - - parseRegexp = uregex_openC(parseExp, 0, NULL, &status); - // Zap any Byte Order Mark at the start of input. Changing it to a space is benign // given the syntax of the input. if (*input == 0xfeff) { diff --git a/icu4c/source/i18n/vtzone.cpp b/icu4c/source/i18n/vtzone.cpp index 51a6a81ef46..7d5eb0197b5 100644 --- a/icu4c/source/i18n/vtzone.cpp +++ b/icu4c/source/i18n/vtzone.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2007-2010, International Business Machines Corporation and +* Copyright (C) 2007-2011, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -18,7 +18,6 @@ #include "cmemory.h" #include "uvector.h" #include "gregoimp.h" -#include "uhash.h" U_NAMESPACE_BEGIN @@ -962,7 +961,7 @@ VTimeZone::VTimeZone(const VTimeZone& source) if (source.vtzlines != NULL) { UErrorCode status = U_ZERO_ERROR; int32_t size = source.vtzlines->size(); - vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status); + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); if (U_SUCCESS(status)) { for (int32_t i = 0; i < size; i++) { UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i); @@ -1007,7 +1006,7 @@ VTimeZone::operator=(const VTimeZone& right) { if (right.vtzlines != NULL) { UErrorCode status = U_ZERO_ERROR; int32_t size = right.vtzlines->size(); - vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status); + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); if (U_SUCCESS(status)) { for (int32_t i = 0; i < size; i++) { UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i); @@ -1242,7 +1241,7 @@ VTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial, void VTimeZone::load(VTZReader& reader, UErrorCode& status) { - vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status); + vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status); if (U_FAILURE(status)) { return; } @@ -1378,7 +1377,7 @@ VTimeZone::parse(UErrorCode& status) { // Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules. rules->setDeleter(deleteTimeZoneRule); - dates = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); if (U_FAILURE(status)) { goto cleanupParse; } @@ -1741,7 +1740,7 @@ VTimeZone::write(VTZWriter& writer, UErrorCode& status) const { } else { UVector *customProps = NULL; if (olsonzid.length() > 0 && icutzver.length() > 0) { - customProps = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + customProps = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); if (U_FAILURE(status)) { return; } @@ -1769,7 +1768,7 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) /*const*/ { } InitialTimeZoneRule *initial = NULL; UVector *transitionRules = NULL; - UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status); UnicodeString tzid; // Extract rules applicable to dates after the start time @@ -1833,7 +1832,7 @@ VTimeZone::writeSimple(UDate time, VTZWriter& writer, UErrorCode& status) /*cons return; } - UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status); UnicodeString tzid; // Extract simple rules diff --git a/icu4c/source/i18n/zonemeta.cpp b/icu4c/source/i18n/zonemeta.cpp index 01de17cf062..79f8172ac7b 100644 --- a/icu4c/source/i18n/zonemeta.cpp +++ b/icu4c/source/i18n/zonemeta.cpp @@ -763,7 +763,7 @@ ZoneMeta::initAvailableMetaZoneIDs () { if (!gMetaZoneIDsInitialized) { UErrorCode status = U_ZERO_ERROR; UHashtable *metaZoneIDTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); - uhash_setKeyDeleter(metaZoneIDTable, uhash_deleteUnicodeString); + uhash_setKeyDeleter(metaZoneIDTable, uprv_deleteUObject); // No valueDeleter, because the vector maintain the value objects UVector *metaZoneIDs = NULL; if (U_SUCCESS(status)) { @@ -775,7 +775,7 @@ ZoneMeta::initAvailableMetaZoneIDs () { uhash_close(metaZoneIDTable); } if (U_SUCCESS(status)) { - metaZoneIDs->setDeleter(uhash_freeBlock); + metaZoneIDs->setDeleter(uprv_free); UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status); UResourceBundle *bundle = ures_getByKey(rb, gMapTimezonesTag, NULL, &status); diff --git a/icu4c/source/test/depstest/dependencies.py b/icu4c/source/test/depstest/dependencies.py new file mode 100755 index 00000000000..448f685c511 --- /dev/null +++ b/icu4c/source/test/depstest/dependencies.py @@ -0,0 +1,194 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2011, International Business Machines +# Corporation and others. All Rights Reserved. +# +# file name: dependencies.py +# +# created on: 2011may26 + +"""Reader module for dependency data for the ICU dependency tester. + +Reads dependencies.txt and makes the data available. + +Attributes: + files: Set of "library/filename.o" files mentioned in the dependencies file. + items: Map from library or group names to item maps. + Each item has a "type" ("library" or "group" or "system_symbols"). + A library or group item can have an optional set of "files" (as in the files attribute). + Each item can have an optional set of "deps" (libraries & groups). + A group item also has a "library" name unless it is a group of system symbols. + The one "system_symbols" item and its groups have sets of "system_symbols" + with standard-library system symbol names. + libraries: Set of library names mentioned in the dependencies file. +""" +__author__ = "Markus W. Scherer" + +# TODO: Support binary items. +# .txt syntax: binary: tools/genrb +# item contents: {"type": "binary"} with optional files & deps +# A binary must not be used as a dependency for anything else. + +import sys + +files = set() +items = {} +libraries = set() + +_line_number = 0 +_groups_to_be_defined = set() + +def _CheckLibraryName(name): + global _line_number + if not name: + sys.exit("Error:%d: \"library: \" without name" % _line_number) + if name.endswith(".o"): + sys.exit("Error:%d: invalid library name %s" % (_line_number, name)) + +def _CheckGroupName(name): + global _line_number + if not name: + sys.exit("Error:%d: \"group: \" without name" % _line_number) + if "/" in name or name.endswith(".o"): + sys.exit("Error:%d: invalid group name %s" % (_line_number, name)) + +def _CheckFileName(name): + global _line_number + if "/" in name or not name.endswith(".o"): + sys.exit("Error:%d: invalid file name %s" % (_line_number, name)) + +def _RemoveComment(line): + global _line_number + _line_number = _line_number + 1 + index = line.find("#") # Remove trailing comment. + if index >= 0: line = line[:index] + return line.rstrip() # Remove trailing newlines etc. + +def _ReadLine(f): + while True: + line = _RemoveComment(f.next()) + if line: return line + +def _ReadFiles(deps_file, item, library_name): + global files + item_files = item.get("files") + while True: + line = _ReadLine(deps_file) + if not line: continue + if not line.startswith(" "): return line + if item_files == None: item_files = item["files"] = set() + for file_name in line.split(): + _CheckFileName(file_name) + file_name = library_name + "/" + file_name + if file_name in files: + sys.exit("Error:%d: file %s listed in multiple groups" % (_line_number, file_name)) + files.add(file_name) + item_files.add(file_name) + +def _IsLibrary(item): return item and item["type"] == "library" + +def _IsLibraryGroup(item): return item and "library" in item + +def _ReadDeps(deps_file, item, library_name): + global items, _line_number, _groups_to_be_defined + item_deps = item.get("deps") + while True: + line = _ReadLine(deps_file) + if not line: continue + if not line.startswith(" "): return line + if item_deps == None: item_deps = item["deps"] = set() + for dep in line.split(): + _CheckGroupName(dep) + dep_item = items.get(dep) + if item["type"] == "system_symbols" and (_IsLibraryGroup(dep_item) or _IsLibrary(dep_item)): + sys.exit(("Error:%d: system_symbols depend on previously defined " + + "library or library group %s") % (_line_number, dep)) + if dep_item == None: + # Add this dependency as a new group. + items[dep] = {"type": "group"} + if library_name: items[dep]["library"] = library_name + _groups_to_be_defined.add(dep) + item_deps.add(dep) + +def _AddSystemSymbol(item, symbol): + exports = item.get("system_symbols") + if exports == None: exports = item["system_symbols"] = set() + exports.add(symbol) + +def _ReadSystemSymbols(deps_file, item): + global _line_number + while True: + line = _ReadLine(deps_file) + if not line: continue + if not line.startswith(" "): return line + line = line.lstrip() + if '"' in line: + # One double-quote-enclosed symbol on the line, allows spaces in a symbol name. + symbol = line[1:-1] + if line.startswith('"') and line.endswith('"') and '"' not in symbol: + _AddSystemSymbol(item, symbol) + else: + sys.exit("Error:%d: invalid quoted symbol name %s" % (_line_number, line)) + else: + # One or more space-separate symbols. + for symbol in line.split(): _AddSystemSymbol(item, symbol) + +def Load(): + """Reads "dependencies.txt" and populates the module attributes.""" + global items, libraries, _line_number, _groups_to_be_defined + deps_file = open("dependencies.txt") + try: + line = None + current_type = None + while True: + while not line: line = _RemoveComment(deps_file.next()) + + if line.startswith("library: "): + current_type = "library" + name = line[9:].lstrip() + _CheckLibraryName(name) + if name in items: + sys.exit("Error:%d: library definition using duplicate name %s" % (_line_number, name)) + libraries.add(name) + item = items[name] = {"type": "library"} + line = _ReadFiles(deps_file, item, name) + elif line.startswith("group: "): + current_type = "group" + name = line[7:].lstrip() + _CheckGroupName(name) + if name not in items: + sys.exit("Error:%d: group %s defined before mentioned as a dependency" % + (_line_number, name)) + if name not in _groups_to_be_defined: + sys.exit("Error:%d: group definition using duplicate name %s" % (_line_number, name)) + _groups_to_be_defined.remove(name) + item = items[name] + library_name = item.get("library") + if library_name: + line = _ReadFiles(deps_file, item, library_name) + else: + line = _ReadSystemSymbols(deps_file, item) + elif line == " deps": + if current_type == "library": + line = _ReadDeps(deps_file, items[name], name) + elif current_type == "group": + item = items[name] + line = _ReadDeps(deps_file, item, item.get("library")) + elif current_type == "system_symbols": + item = items[current_type] + line = _ReadDeps(deps_file, item, None) + else: + sys.exit("Error:%d: deps before any library or group" % _line_number) + elif line == "system_symbols:": + current_type = "system_symbols" + if current_type in items: + sys.exit("Error:%d: duplicate entry for system_symbols" % _line_number) + item = items[current_type] = {"type": current_type} + line = _ReadSystemSymbols(deps_file, item) + else: + sys.exit("Syntax error:%d: %s" % (_line_number, line)) + except StopIteration: + pass + if _groups_to_be_defined: + sys.exit("Error: some groups mentioned in dependencies are undefined: %s" % _groups_to_be_defined) diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt new file mode 100644 index 00000000000..3e87fdfa394 --- /dev/null +++ b/icu4c/source/test/depstest/dependencies.txt @@ -0,0 +1,893 @@ +# Copyright (C) 2011, International Business Machines +# Corporation and others. All Rights Reserved. +# +# file name: dependencies.txt +# +# created on: 2011may26 +# created by: Markus W. Scherer + +# Standard library symbols used by ICU --------------------------------------- # + +system_symbols: + deps + # C + PIC system_debug errno_perror malloc_functions c_strings c_string_formatting + floating_point trigonometry + stdlib_qsort + pthread system_locale + stdio_input stdio_output file_io readlink_function dir_io mmap_functions dlfcn + # C++ + cplusplus iostream + +group: PIC + # Position-Independent Code (-fPIC) requires a Global Offset Table. + _GLOBAL_OFFSET_TABLE_ + +group: system_debug + __assert_fail __stack_chk_fail + +group: errno_perror + perror # putil.cpp uprv_dl_open() calls perror("dlopen") + +group: malloc_functions + free malloc realloc + +group: c_strings + isspace + __ctype_b_loc # for + # We must not use tolower and toupper because they are system-locale-sensitive (Turkish i). + strlen strchr strrchr strstr strcmp strncmp strcpy strncpy strcat strncat + memcmp memcpy memmove memset + # Additional symbols in an optimized build. + __strcpy_chk __strncpy_chk __strcat_chk __strncat_chk + __rawmemchr __memcpy_chk __memmove_chk + +group: c_string_formatting + atoi atol strtod strtol strtoul + sprintf + # Additional symbols in an optimized build. + __sprintf_chk + +group: floating_point + floor ceil modf fmod log pow sqrt + +group: trigonometry + acos asin atan atan2 cos sin tan + # Additional symbols in an optimized build. + sincos + +group: stdlib_qsort + qsort + +group: pthread + pthread_mutex_init pthread_mutex_destroy pthread_mutex_lock pthread_mutex_unlock + +group: system_locale + getenv + nl_langinfo setlocale + gettimeofday localtime_r tzname tzset __timezone + +group: stdio_input + fopen fclose fgets fread fseek ftell rewind feof fileno + # Additional symbols in an optimized build. + __fgets_chk __fread_chk + +group: stdio_output + fflush fwrite + +group: file_io + open close stat + # Additional symbols in an optimized build. + __xstat + +group: readlink_function + readlink # putil.cpp uprv_tzname() calls this in a hack to get the time zone name + +group: dir_io + opendir closedir readdir # for a hack to get the time zone name + +group: mmap_functions # for memory-mapped data loading + mmap munmap + +group: dlfcn + dlopen dlclose dlsym # called by putil.o only for icuplug.o + +group: cplusplus + __dynamic_cast + # The compiler generates references to the global operator delete + # even when no code actually uses it. + # ICU must not _use_ the global operator delete. + "operator delete(void*)" + # ICU also must not use the global operator new. + # "operator new[](unsigned long)" + # _Unwind_Resume is related to exceptions: + # "A call to this routine is inserted as the end of a landing pad that performs cleanup, + # but does not resume normal execution. It causes unwinding to proceed further." + # (Linux Standard Base Specification 1.3) + # Even though ICU does not actually use (nor handle) exceptions. + _Unwind_Resume + +group: iostream + "std::basic_ios >::clear(std::_Ios_Iostate)" + "std::basic_ios >::eof() const" + "std::basic_ios >::fail() const" + "std::basic_ostream >& std::operator<< >(std::basic_ostream >&, char const*)" + std::istream::get() + std::istream::putback(char) + # Additional symbols in an optimized build. + "std::basic_ostream >& std::__ostream_insert >(std::basic_ostream >&, char const*, long)" + +# ICU common library --------------------------------------------------------- # + +library: stubdata + stubdata.o # Exports icudt48_dat. + +library: common + # All files in the common library are listed in its dependencies. + deps + # Libraries and groups that the common library depends on. + date_interval + breakiterator + uts46 filterednormalizer2 normalizer2 canonical_iterator + normlzr unormcmp unorm_it unorm + idna2003 stringprep + stringenumeration + unistr_core unistr_props unistr_case unistr_case_locale unistr_titlecase_brkiter unistr_cnv + uniset_core uniset_props uniset_closure usetiter uset uset_props + uiter + ucasemap ucasemap_titlecase_brkiter script_runs + uprops ubidi_props ucase uscript + ubidi ushape + resourcebundle service_registration resbund_cnv ures_cnv icudataver ucat + loclikely + conversion converter_selector ucnv_set ucnvdisp + messagepattern + icu_utility icu_utility_with_props + ustr_wcs + ucharstriebuilder ucharstrieiterator + bytestriebuilder bytestrieiterator + hashtable uhash uvector uvector32 uvector64 ulist + propsvec utrie2 utrie2_builder + sort + uinit utypes errorcode + icuplug + platform + +group: date_interval # class DateInterval + dtintrv.o + deps + platform + +group: breakiterator + # We could try to split off a breakiterator_builder group, + # but we still need uniset_props for code like in the ThaiBreakEngine constructor + # which does + # fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status) + brkiter.o brkeng.o ubrk.o + rbbi.o rbbinode.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o + rbbidata.o rbbirb.o + triedict.o dictbe.o + deps + resourcebundle service_registration + schriter utext uniset_core uniset_props + uhash ustack utrie + uvector32 # for triedict.o + +group: unormcmp # unorm_compare() + unormcmp.o + deps + filterednormalizer2 + uniset_props # for uniset_getUnicode32Instance() + ucase + +group: unorm_it # UNormIterator + unorm_it.o + deps + unorm uiter + +group: unorm # old normalization C API + unorm.o + deps + filterednormalizer2 + uniset_props # for uniset_getUnicode32Instance() + uiter + +group: normlzr # old Normalizer C++ class + normlzr.o + deps + filterednormalizer2 + uniset_props # for uniset_getUnicode32Instance() + schriter + +group: uts46 + uts46.o + deps + normalizer2 punycode + uchar # for u_charType() (via U_GET_GC_MASK(c)) + ubidi_props # for u_charDirection() & ubidi_getJoiningType() + unistr_core + stringpiece bytestream + +group: filterednormalizer2 + filterednormalizer2.o + deps + normalizer2 + +group: idna2003 + uidna.o + deps + stringprep punycode + +group: stringprep + usprep.o + deps + unorm # could change to use filterednormalizer2 directly for Unicode 3.2 normalization + normalizer2 + ubidi_props + +group: canonical_iterator + caniter.o + deps + normalizer2 usetiter + +group: normalizer2 + normalizer2.o + normalizer2impl.o + deps + uniset_core + unistr_core + utrie2_builder # for building CanonIterData & FCD + uvector # for building CanonIterData + uhash # for the instance cache + udata + +group: punycode + punycode.o + deps + platform + +group: uset_props + uset_props.o + deps + uniset_closure uniset_props uniset_core + +group: uset + uset.o + deps + uniset_core + +group: uniset_closure + uniset_closure.o + deps + uniset_core unistr_case_locale unistr_titlecase_brkiter + +group: uniset_props + uniset_props.o ruleiter.o + deps + uniset_core uprops unistr_case + parsepos + resourcebundle + propname unames + +group: parsepos + parsepos.o + deps + platform + +group: usetiter # UnicodeSetIterator + usetiter.o + deps + uniset_core + +group: uniset_core + unifilt.o unifunct.o + uniset.o bmpset.o unisetspan.o + deps + patternprops + unistr_core icu_utility + uvector + +group: icu_utility_with_props + util_props.o + deps + icu_utility uchar ucase + +group: icu_utility + util.o + deps + unistr_core patternprops + +group: utext + utext.o + deps + unistr_core ucase + +group: stringenumeration + ustrenum.o uenum.o + deps + unistr_core + +group: schriter + schriter.o + # The UCharCharacterIterator implements virtual void getText(UnicodeString& result) + # so it depends on UnicodeString, therefore it makes little sense to split + # schriter and uchriter into separate groups. + uchriter.o + deps + chariter unistr_core + +group: chariter + chariter.o + deps + platform + +group: uiter + uiter.o + deps + platform + +group: unistr_cnv + unistr_cnv.o + deps + conversion unistr_core + +group: unistr_core + unistr.o + deps + ustrtrns appendable + +group: uscript + uscript.o # uscript_getCode() accepts a locale ID and loads its script code data + deps + propname resourcebundle + +group: uprops + uprops.o + deps + normalizer2 + uchar + ubidi_props + unistr_case ustring_case # only for case folding + ucase + +group: propname + propname.o + deps + bytestrie + +group: unames + unames.o + deps + uchar udata + +group: script_runs + usc_impl.o + deps + uchar + +group: uchar + uchar.o + deps + utrie2 + +group: messagepattern # for MessageFormat and tools + messagepattern.o + deps + patternprops unistr_core + +group: patternprops + patternprops.o + deps + PIC + +group: ushape + ushape.o + deps + ubidi_props + +group: ubidi + ubidi.o ubidiln.o ubidiwrt.o + deps + ubidi_props + uchar # for doWriteReverse() which uses IS_COMBINING(u_charType(c)) + +group: ubidi_props + ubidi_props.o + deps + utrie2 + +group: unistr_props + unistr_props.o + deps + unistr_core uchar + +group: unistr_case_locale + unistr_case_locale.o + deps + unistr_case ustring_case_locale + +group: unistr_case + unistr_case.o + deps + unistr_core + ustring_case + +group: unistr_titlecase_brkiter + unistr_titlecase_brkiter.o + deps + ustr_titlecase_brkiter + +group: ustr_titlecase_brkiter + ustr_titlecase_brkiter.o + deps + breakiterator + ustring_case_locale ucase + +group: ucasemap_titlecase_brkiter + ucasemap_titlecase_brkiter.o + deps + ucasemap breakiterator utext + +group: ucasemap + ucasemap.o + deps + ustring_case + resourcebundle # uloc_getName() etc. + +group: ustring_case_locale + ustrcase_locale.o + deps + ustring_case + resourcebundle # for uloc_getDefault() + +group: ustring_case + ustrcase.o + deps + ucase + +group: ucase + ucase.o + deps + utrie2 + +group: uinit + uinit.o + deps + ucnv_io icuplug + +group: converter_selector + ucnvsel.o + deps + conversion propsvec utrie2_builder uset ucnv_set + +group: ucnvdisp # ucnv_getDisplayName() + ucnvdisp.o + deps + conversion resourcebundle + +group: ucnv_set # ucnv_getUnicodeSet + ucnv_set.o + deps + uset + +group: conversion + ustr_cnv.o + ucnv.o ucnv_cnv.o ucnv_bld.o ucnv_cb.o ucnv_err.o + ucnv_ct.o + ucnvmbcs.o ucnv_ext.o + ucnvhz.o ucnvisci.o ucnv_lmb.o ucnv2022.o + ucnvlat1.o ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o + ucnvbocu.o ucnvscsu.o + deps + ucnv_io + +group: ucnv_io + ucnv_io.o + deps + sort stringenumeration udata + +group: service_registration + serv.o servnotf.o servlkf.o servlk.o servls.o servrbf.o servslkf.o + locutil.o + deps + locale_display_names resourcebundle + hashtable uvector + +group: ucat # message-catalog-like API + ucat.o + deps + resourcebundle + +group: locale_display_names + locdispnames.o + deps + locresdata + +group: icudataver # u_getDataVersion() + icudataver.o + deps + resourcebundle + +group: loclikely + loclikely.o + deps + resourcebundle + +group: locresdata + # This was intended to collect locale functions that load resource bundle data. + # See the resourcebundle group about what else loads data. + locresdata.o + deps + resourcebundle + +group: resbund_cnv # paths are Unicode strings + resbund_cnv.o + deps + conversion resourcebundle ures_cnv + +group: ures_cnv # ures_openU, path is a Unicode string + ures_cnv.o + deps + conversion resourcebundle + +group: resourcebundle + resbund.o uresbund.o uresdata.o + locavailable.o + # uloc_tag.c converts between old ICU/LDML/CLDR locale IDs and newer BCP 47 IDs. + # It uses data from resource bundles for some of the mappings. + # We might want to generate .c files for that data, to #include rather than load, + # to minimize dependencies from this code. + # Then we could separate this higher-level locale ID code from the resource bundle code. + uloc.o uloc_tag.o + # Even basic locid.cpp via Locale constructors and Locale::getDefault() + # depend on canonicalization and data loading. + # We can probably only disentangle basic locale ID handling from resource bundle code + # by hardcoding all of the locale ID data. + locid.o locmap.o wintz.o + # Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608 + locbased.o + deps + udata ucol_swp + sort stringenumeration uhash + +group: udata + udata.o ucmndata.o udatamem.o + umapfile.o + deps + uhash charstr stringpiece platform stubdata + file_io mmap_functions + +group: ucharstriebuilder + ucharstriebuilder.o + deps + ucharstrie stringtriebuilder sort + unistr_core + +group: ucharstrieiterator + ucharstrieiterator.o + deps + ucharstrie unistr_core uvector32 + +group: ucharstrie + ucharstrie.o + deps + platform + +group: bytestriebuilder + bytestriebuilder.o + deps + bytestrie stringtriebuilder sort + charstr stringpiece + +group: bytestrieiterator + bytestrieiterator.o + deps + bytestrie charstr uvector32 + +group: bytestrie + bytestrie.o + deps + platform + +group: stringtriebuilder + stringtriebuilder.o + deps + uhash + +group: propsvec + propsvec.o + deps + sort utrie2_builder + +group: utrie2_builder + utrie2_builder.o + deps + platform + utrie2 + utrie # for utrie2_fromUTrie() + ucol_swp # for utrie_swap() + +group: utrie2 + utrie2.o + deps + platform + +group: utrie # Callers should use utrie2 instead. + utrie.o + deps + platform + +group: hashtable # Maps UnicodeString to value. + uhash_us.o + deps + unistr_core + uhash + +group: uhash + uhash.o + deps + platform + +group: ustack + ustack.o + deps + uvector + +group: uvector + uvector.o + deps + platform + sort # for UVector::sort() + +group: uvector32 + uvectr32.o + deps + platform + +group: uvector64 + uvectr64.o + deps + platform + +group: ulist + ulist.o + deps + platform + +group: sort + uarrsort.o + deps + platform + +group: ustr_wcs + ustr_wcs.o + deps + ustrtrns # on platforms where wchar_t is UTF-32 + # platform -- on other platforms + +group: ustrtrns + ustrtrns.o + deps + platform + +group: charstr + charstr.o + deps + unistr_core # for CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) + platform + +group: stringpiece + stringpiece.o + deps + PIC c_strings + +group: bytestream + bytestream.o + deps + platform + +group: appendable + appendable.o + deps + platform + +group: icuplug + icuplug.o + deps + platform + +group: ucol_swp + ucol_swp.o + deps + platform + +group: errorcode # ErrorCode base class + errorcode.o + deps + utypes + PIC + +group: utypes # u_errorName() + utypes.o + +group: platform + # Files in the "platform" group. + cmemory.o uobject.o + cstring.o cwchar.o uinvchar.o + ustring.o # Other platform files really just need u_strlen + ustrfmt.o # uprv_itou + utf_impl.o + putil.o + ucln_cmn.o # for putil.o which calls ucln_common_registerCleanup + udataswp.o # for uinvchar.o; TODO: move uinvchar.o swapper functions to udataswp.o? + umath.o + mutex.o umutex.o + utrace.o + deps + # The "platform" group has no ICU dependencies. + PIC system_debug malloc_functions c_strings c_string_formatting + floating_point pthread system_locale + stdio_input readlink_function dir_io + errno_perror dlfcn # Move related code into icuplug.c? + cplusplus + +# ICU i18n library ----------------------------------------------------------- # + +library: i18n + deps + localedata charset_detector spoof_detection + alphabetic_index collation formatting formattable_cnv regex regex_cnv translit + universal_time_scale + uclean_i18n + +group: localedata + ulocdata.o + deps + uniset_props resourcebundle + uset_props # TODO: change to using C++ UnicodeSet, remove this dependency + +group: charset_detector + csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o ucsdet.o + deps + conversion + uclean_i18n + +group: spoof_detection + uspoof.o uspoof_build.o uspoof_conf.o uspoof_impl.o uspoof_wsconf.o + deps + uniset_props regex unorm uscript + +group: alphabetic_index + alphaindex.o + deps + collation localedata + uclean_i18n + +group: collation + bocsu.o coleitr.o coll.o colldata.o sortkey.o tblcoll.o ucol.o + ucol_bld.o ucol_cnt.o ucol_elm.o ucol_res.o ucol_sit.o ucol_tok.o ucol_wgt.o ucoleitr.o + bms.o bmsearch.o search.o stsearch.o usearch.o + deps + common # TODO: Could be narrower. + uclean_i18n + +group: formatting + # TODO: Try to subdivide this ball of wax. + # locale_display_names2 + locdspnm.o + # currency + ucurr.o + # currencyformat + curramt.o currfmt.o currpinf.o currunit.o + # decimalformat + dcfmtsym.o decfmtst.o decimfmt.o + numfmt.o numsys.o unum.o winnmfmt.o + # rbnf + nfrs.o nfrule.o nfsubs.o rbnf.o + # measureformat + measfmt.o + # dateformat + astro.o buddhcal.o calendar.o cecal.o chnsecal.o coptccal.o ethpccal.o + gregocal.o gregoimp.o hebrwcal.o indiancal.o islamcal.o japancal.o persncal.o taiwncal.o + ucal.o + basictz.o olsontz.o rbtz.o simpletz.o timezone.o tzrule.o tztrans.o + vtzone.o vzone.o wintzimpl.o zonemeta.o zrule.o ztrans.o + tzfmt.o tzgnames.o tznames.o tznames_impl.o + datefmt.o dtfmtsym.o dtitvfmt.o dtitvinf.o dtptngen.o dtrule.o reldtfmt.o + smpdtfmt.o smpdtfst.o udateintervalformat.o udatpg.o windtfmt.o + udat.o + tmunit.o tmutamt.o tmutfmt.o + # messageformat + choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o + deps + digitlist formattable format + pluralrules + collation # for rbnf + common + floating_point # sqrt() for astro.o + trigonometry # for astro.o + stdlib_qsort # for ucurr.o (which does not use ICU's uarrsort.o) + uclean_i18n + +group: digitlist + digitlst.o decContext.o decNumber.o + deps + charstr stringpiece unistr_core + +group: formattable + fmtable.o + measure.o + deps + unistr_core digitlist stringpiece charstr + +group: formattable_cnv + fmtable_cnv.o + deps + formattable unistr_cnv conversion + +group: format + format.o fphdlimp.o fpositer.o + deps + resourcebundle parsepos unistr_core uvector32 + +group: pluralrules + plurrule.o upluralrules.o + deps + patternprops resourcebundle uvector + unistr_case_locale + +group: regex_cnv + uregexc.o + deps + regex unistr_cnv + +group: regex + regexcmp.o regexst.o regextxt.o rematch.o repattrn.o uregex.o + deps + uniset_closure utext uvector32 uvector64 ustack + breakiterator + unistr_core + uinit # TODO: Really needed? + uclean_i18n + +group: translit + anytrans.o brktrans.o casetrn.o cpdtrans.o name2uni.o uni2name.o nortrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o + esctrn.o unesctrn.o nultrans.o + funcrepl.o quant.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o strmatch.o strrepl.o translit.o transreg.o tridpars.o utrans.o + deps + common + formatting # for Transliterator::getDisplayName() + uclean_i18n + +group: universal_time_scale + utmscale.o + +group: uclean_i18n + ucln_in.o + deps + platform + +# ICU io library ------------------------------------------------------------- # + +library: io + deps + ustdio ustream uclean_io + +group: ustdio + locbund.o sprintf.o sscanf.o ufile.o ufmt_cmn.o uprintf.o uprntf_p.o uscanf.o uscanf_p.o ustdio.o + deps + formatting conversion translit + uclean_io + stdio_output + +group: ustream + ustream.o + deps + unistr_cnv + uchar # for u_isWhitespace() + iostream + +group: uclean_io + ucln_io.o + deps + platform diff --git a/icu4c/source/test/depstest/depstest.py b/icu4c/source/test/depstest/depstest.py new file mode 100755 index 00000000000..f25c8c737d4 --- /dev/null +++ b/icu4c/source/test/depstest/depstest.py @@ -0,0 +1,167 @@ +#! /usr/bin/python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2011, International Business Machines +# Corporation and others. All Rights Reserved. +# +# file name: depstest.py +# +# created on: 2011may24 + +"""ICU dependency tester. + +This probably works only on Linux. + +The exit code is 0 if everything is fine, 1 for errors, 2 for only warnings. + +Sample invocation: + ~/svn.icu/trunk/src/source/test/depstest$ ./depstest.py ~/svn.icu/trunk/dbg +""" + +__author__ = "Markus W. Scherer" + +import glob +import os.path +import subprocess +import sys + +import dependencies + +_ignored_symbols = set() +_obj_files = {} +_symbols_to_files = {} +_return_value = 0 + +def _ReadObjFile(root_path, library_name, obj_name): + global _ignored_symbols, _obj_files, _symbols_to_files + lib_obj_name = library_name + "/" + obj_name + if lib_obj_name in _obj_files: + print "Warning: duplicate .o file " + lib_obj_name + _return_value = 2 + return + + path = os.path.join(root_path, library_name, obj_name) + nm_result = subprocess.Popen(["nm", "--demangle", "--format=sysv", + "--extern-only", "--no-sort", path], + stdout=subprocess.PIPE).communicate()[0] + obj_imports = set() + obj_exports = set() + for line in nm_result.splitlines(): + fields = line.split("|") + if len(fields) == 1: continue + name = fields[0].strip() + # Ignore symbols like '__cxa_pure_virtual', + # 'vtable for __cxxabiv1::__si_class_type_info' or + # 'DW.ref.__gxx_personality_v0'. + if name.startswith("__cxa") or "__cxxabi" in name or "__gxx" in name: + _ignored_symbols.add(name) + continue + type = fields[2].strip() + if type == "U": + obj_imports.add(name) + else: + # TODO: Investigate weak symbols (V, W) with or without values. + obj_exports.add(name) + _symbols_to_files[name] = lib_obj_name + _obj_files[lib_obj_name] = {"imports": obj_imports, "exports": obj_exports} + +def _ReadLibrary(root_path, library_name): + obj_paths = glob.glob(os.path.join(root_path, library_name, "*.o")) + for path in obj_paths: + _ReadObjFile(root_path, library_name, os.path.basename(path)) + +def _Resolve(name, parents): + global _ignored_symbols, _obj_files, _symbols_to_files, _return_value + item = dependencies.items[name] + item_type = item["type"] + if name in parents: + sys.exit("Error: %s %s has a circular dependency on itself: %s" % + (item_type, name, parents)) + # Check if already cached. + exports = item.get("exports") + if exports != None: return item + # Calculcate recursively. + parents.append(name) + imports = set() + exports = set() + system_symbols = item.get("system_symbols") + if system_symbols == None: system_symbols = item["system_symbols"] = set() + files = item.get("files") + if files: + for file_name in files: + obj_file = _obj_files[file_name] + imports |= obj_file["imports"] + exports |= obj_file["exports"] + imports -= exports | _ignored_symbols + deps = item.get("deps") + if deps: + for dep in deps: + dep_item = _Resolve(dep, parents) + # Detect whether this item needs to depend on dep, + # except when this item has no files, that is, when it is just + # a deliberate umbrella group or library. + dep_exports = dep_item["exports"] + dep_system_symbols = dep_item["system_symbols"] + if files and imports.isdisjoint(dep_exports) and imports.isdisjoint(dep_system_symbols): + print "Info: %s %s does not need to depend on %s\n" % (item_type, name, dep) + # We always include the dependency's exports, even if we do not need them + # to satisfy local imports. + exports |= dep_exports + system_symbols |= dep_system_symbols + item["exports"] = exports + item["system_symbols"] = system_symbols + imports -= exports | system_symbols + for symbol in imports: + for file_name in files: + if symbol in _obj_files[file_name]["imports"]: + sys.stderr.write("Error: %s %s file %s imports %s but %s does not depend on %s\n" % + (item_type, name, file_name, symbol, name, _symbols_to_files.get(symbol))) + _return_value = 1 + del parents[-1] + return item + +def Process(root_path): + """Loads dependencies.txt, reads the libraries' .o files, and processes them. + + Modifies dependencies.items: Recursively builds each item's system_symbols and exports. + """ + global _ignored_symbols, _obj_files, _return_value + dependencies.Load() + for name_and_item in dependencies.items.iteritems(): + name = name_and_item[0] + item = name_and_item[1] + system_symbols = item.get("system_symbols") + if system_symbols: + for symbol in system_symbols: + _symbols_to_files[symbol] = name + for library_name in dependencies.libraries: + _ReadLibrary(root_path, library_name) + o_files_set = set(_obj_files.keys()) + files_missing_from_deps = o_files_set - dependencies.files + files_missing_from_build = dependencies.files - o_files_set + if files_missing_from_deps: + sys.stderr.write("Error: files missing from dependencies.txt:\n%s\n" % + sorted(files_missing_from_deps)) + _return_value = 1 + if files_missing_from_build: + sys.stderr.write("Error: files in dependencies.txt but not built:\n%s\n" % + sorted(files_missing_from_build)) + _return_value = 1 + if not _return_value: + for library_name in dependencies.libraries: + _Resolve(library_name, []) + +def main(): + global _return_value + if len(sys.argv) <= 1: + sys.exit(("Command line error: " + + "need one argument with the root path to the built ICU libraries/*.o files.")) + Process(sys.argv[1]) + if _ignored_symbols: + print "Info: ignored symbols:\n%s" % sorted(_ignored_symbols) + if not _return_value: + print "OK: Specified and actual dependencies match." + return _return_value + +if __name__ == "__main__": + sys.exit(main()) diff --git a/icu4c/source/test/intltest/canittst.cpp b/icu4c/source/test/intltest/canittst.cpp index a1b9a2ad318..fe85b1fac3f 100644 --- a/icu4c/source/test/intltest/canittst.cpp +++ b/icu4c/source/test/intltest/canittst.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2002-2010, International Business Machines Corporation and + * Copyright (c) 2002-2011, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************** * @@ -130,7 +130,7 @@ void CanonicalIteratorTest::TestBasic() { // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! Hashtable *permutations = new Hashtable(FALSE, status); - permutations->setValueDeleter(uhash_deleteUnicodeString); + permutations->setValueDeleter(uprv_deleteUObject); UnicodeString toPermute("ABC"); CanonicalIterator::permute(toPermute, FALSE, permutations, status); @@ -144,7 +144,7 @@ void CanonicalIteratorTest::TestBasic() { // try samples logln("testing samples"); Hashtable *set = new Hashtable(FALSE, status); - set->setValueDeleter(uhash_deleteUnicodeString); + set->setValueDeleter(uprv_deleteUObject); int32_t i = 0; CanonicalIterator it("", status); if(U_SUCCESS(status)) { diff --git a/icu4c/source/test/intltest/icusvtst.cpp b/icu4c/source/test/intltest/icusvtst.cpp index 1a2e15abfe4..c45c68c9dbf 100644 --- a/icu4c/source/test/intltest/icusvtst.cpp +++ b/icu4c/source/test/intltest/icusvtst.cpp @@ -1,6 +1,6 @@ /** ******************************************************************************* - * Copyright (C) 2001-2010, International Business Machines Corporation and + * Copyright (C) 2001-2011, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* */ @@ -458,7 +458,7 @@ ICUServiceTest::testAPI_One() // should not be able to locate invisible services { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status); service.getVisibleIDs(ids, status); UnicodeString target = "en_US_BAR"; confirmBoolean("18) find invisible", !ids.contains(&target)); @@ -551,7 +551,7 @@ class TestMultipleKeyStringFactory : public ICUServiceFactory { public: TestMultipleKeyStringFactory(const UnicodeString ids[], int32_t count, const UnicodeString& factoryID) : _status(U_ZERO_ERROR) - , _ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, count, _status) + , _ids(uprv_deleteUObject, uhash_compareUnicodeString, count, _status) , _factoryID(factoryID + ": ") { for (int i = 0; i < count; ++i) { @@ -680,7 +680,7 @@ ICUServiceTest::testAPI_Two() // iterate over the visual ids returned by the multiple factory { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); for (int i = 0; i < ids.size(); ++i) { const UnicodeString* id = (const UnicodeString*)ids[i]; @@ -801,7 +801,7 @@ ICUServiceTest::testAPI_Two() { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); for (int i = 0; i < ids.size(); ++i) { const UnicodeString* id = (const UnicodeString*)ids[i]; @@ -897,7 +897,7 @@ ICUServiceTest::testRBF() // list all of the resources { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); logln("all visible ids:"); for (int i = 0; i < ids.size(); ++i) { @@ -1164,7 +1164,7 @@ void ICUServiceTest::testLocale() { { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); logln("all visible ids:"); for (int i = 0; i < ids.size(); ++i) { @@ -1176,7 +1176,7 @@ void ICUServiceTest::testLocale() { Locale::setDefault(loc, status); { UErrorCode status = U_ZERO_ERROR; - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status); service.getVisibleIDs(ids, status); logln("all visible ids:"); for (int i = 0; i < ids.size(); ++i) { @@ -1368,7 +1368,7 @@ void ICUServiceTest::testCoverage() } } - UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status); + UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status); // yuck, this is awkward to use. All because we pass null in an overload. // TODO: change this. UnicodeString str("Greet"); diff --git a/icu4c/source/test/intltest/svccoll.cpp b/icu4c/source/test/intltest/svccoll.cpp index 3f0d6cc8a52..a781d4a8139 100644 --- a/icu4c/source/test/intltest/svccoll.cpp +++ b/icu4c/source/test/intltest/svccoll.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2003-2010, International Business Machines Corporation and * + * Copyright (C) 2003-2011, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -353,7 +353,7 @@ void CollationServiceTest::TestRegisterFactory(void) errln("memory allocation error"); return; } - fuFUNames->setValueDeleter(uhash_deleteUnicodeString); + fuFUNames->setValueDeleter(uprv_deleteUObject); fuFUNames->put(fu_FU.getName(), new UnicodeString("ze leetle bunny Fu-Fu"), status); fuFUNames->put(fu_FU_FOO.getName(), new UnicodeString("zee leetel bunny Foo-Foo"), status); diff --git a/icu4c/source/test/intltest/usettest.cpp b/icu4c/source/test/intltest/usettest.cpp index c3d98a3c067..6c40121d930 100644 --- a/icu4c/source/test/intltest/usettest.cpp +++ b/icu4c/source/test/intltest/usettest.cpp @@ -1573,7 +1573,7 @@ public: Hashtable contents; TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) { - contents.setValueDeleter(uhash_deleteUnicodeString); + contents.setValueDeleter(uprv_deleteUObject); } ~TokenSymbolTable() {} diff --git a/icu4c/source/tools/genrb/reslist.c b/icu4c/source/tools/genrb/reslist.c index 66d370405d1..5b1a433be77 100644 --- a/icu4c/source/tools/genrb/reslist.c +++ b/icu4c/source/tools/genrb/reslist.c @@ -886,7 +886,7 @@ struct SResource* array_open(struct SRBRoot *bundle, const char *tag, const stru static int32_t U_CALLCONV string_hash(const UHashTok key) { const struct SResource *res = (struct SResource *)key.pointer; - return uhash_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength); + return ustr_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength); } static UBool U_CALLCONV