bytestrie.o bytestrieiterator.o \
ucharstrie.o ucharstriebuilder.o ucharstrieiterator.o \
appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
-utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
+utf_impl.o ustring.o ustrcase.o ucasemap.o ucasemap_titlecase_brkiter.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
+unistr_case_locale.o ustrcase_locale.o unistr_titlecase_brkiter.o ustr_titlecase_brkiter.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
-utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
+utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uniset_closure.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
#include "uhash.h"
#include "uarrsort.h"
#include "uassert.h"
+#include "ustr_imp.h"
U_NAMESPACE_BEGIN
BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(bytes) {
- hash=hash*37+uhash_hashCharsN(bytes, len);
+ hash=hash*37+ustr_hashCharsN(bytes, len);
}
UBool
/*
*****************************************************************************
- * Copyright (C) 1996-2010, International Business Machines Corporation and *
+ * Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*****************************************************************************
*/
if(U_FAILURE(status)) {
return;
}
- subpermute.setValueDeleter(uhash_deleteUnicodeString);
+ subpermute.setValueDeleter(uprv_deleteUObject);
for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) {
cp = source.char32At(i);
if (U_FAILURE(status)) {
return 0;
}
- result.setValueDeleter(uhash_deleteUnicodeString);
- permutations.setValueDeleter(uhash_deleteUnicodeString);
- basic.setValueDeleter(uhash_deleteUnicodeString);
+ result.setValueDeleter(uprv_deleteUObject);
+ permutations.setValueDeleter(uprv_deleteUObject);
+ basic.setValueDeleter(uprv_deleteUObject);
UChar USeg[256];
int32_t segLen = segment.extract(USeg, 256, status);
while (iter.next()) {
UChar32 cp2 = iter.getCodepoint();
Hashtable remainder(status);
- remainder.setValueDeleter(uhash_deleteUnicodeString);
+ remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
continue;
}
U_CFUNC UBool
cmemory_cleanup(void);
+/**
+ * A function called by <TT>uhash_remove</TT>,
+ * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
+ * an existing key or value.
+ * @param obj A key or value stored in a hashtable
+ * @see uprv_deleteUObject
+ */
+typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/**
+ * Deleter for UObject instances.
+ * Works for all subclasses of UObject because it has a virtual destructor.
+ */
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj);
+
#ifdef XP_CPLUSPLUS
U_NAMESPACE_BEGIN
<ClCompile Include="unifilt.cpp" />\r
<ClCompile Include="unifunct.cpp" />\r
<ClCompile Include="uniset.cpp" />\r
+ <ClCompile Include="uniset_closure.cpp" />\r
<ClCompile Include="uniset_props.cpp" />\r
<ClCompile Include="unisetspan.cpp" />\r
<ClCompile Include="uprops.cpp" />\r
<ClCompile Include="stringpiece.cpp" />\r
<ClCompile Include="stringtriebuilder.cpp" />\r
<ClCompile Include="ucasemap.cpp" />\r
+ <ClCompile Include="ucasemap_titlecase_brkiter.cpp" />\r
<ClCompile Include="ucharstrie.cpp" />\r
<ClCompile Include="ucharstriebuilder.cpp" />\r
<ClCompile Include="ucharstrieiterator.cpp" />\r
<ClCompile Include="uiter.cpp" />\r
<ClCompile Include="unistr.cpp" />\r
<ClCompile Include="unistr_case.cpp" />\r
+ <ClCompile Include="unistr_case_locale.cpp" />\r
<ClCompile Include="unistr_cnv.cpp" />\r
<ClCompile Include="unistr_props.cpp" />\r
+ <ClCompile Include="unistr_titlecase_brkiter.cpp" />\r
<ClCompile Include="ustr_cnv.c" />\r
+ <ClCompile Include="ustr_titlecase_brkiter.cpp" />\r
<ClCompile Include="ustr_wcs.cpp" />\r
<ClCompile Include="ustrcase.cpp" />\r
+ <ClCompile Include="ustrcase_locale.cpp" />\r
<ClCompile Include="ustring.cpp" />\r
<ClCompile Include="ustrtrns.cpp" />\r
<ClCompile Include="utext.cpp" />\r
<ClCompile Include="uniset.cpp">\r
<Filter>properties & sets</Filter>\r
</ClCompile>\r
+ <ClCompile Include="uniset_closure.cpp">\r
+ <Filter>properties & sets</Filter>\r
+ </ClCompile>\r
<ClCompile Include="uniset_props.cpp">\r
<Filter>properties & sets</Filter>\r
</ClCompile>\r
<ClCompile Include="ucasemap.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
+ <ClCompile Include="ucasemap_titlecase_brkiter.cpp">\r
+ <Filter>strings</Filter>\r
+ </ClCompile>\r
<ClCompile Include="uchriter.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
<ClCompile Include="unistr_case.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
+ <ClCompile Include="unistr_case_locale.cpp">\r
+ <Filter>strings</Filter>\r
+ </ClCompile>\r
<ClCompile Include="unistr_cnv.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
<ClCompile Include="unistr_props.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
+ <ClCompile Include="unistr_titlecase_brkiter.cpp">\r
+ <Filter>strings</Filter>\r
+ </ClCompile>\r
<ClCompile Include="ustr_cnv.c">\r
<Filter>strings</Filter>\r
</ClCompile>\r
+ <ClCompile Include="ustr_titlecase_brkiter.cpp">\r
+ <Filter>strings</Filter>\r
+ </ClCompile>\r
<ClCompile Include="ustr_wcs.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
<ClCompile Include="ustrcase.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
+ <ClCompile Include="ustrcase_locale.cpp">\r
+ <Filter>strings</Filter>\r
+ </ClCompile>\r
<ClCompile Include="ustring.cpp">\r
<Filter>strings</Filter>\r
</ClCompile>\r
/*
******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
#include "unicode/unistr.h"
#include "unicode/uobject.h"
+#include "cmemory.h"
#include "uhash.h"
U_NAMESPACE_BEGIN
uhash_init(&hashObj, keyHash, keyComp, valueComp, &status);
if (U_SUCCESS(status)) {
hash = &hashObj;
- uhash_setKeyDeleter(hash, uhash_deleteUnicodeString);
+ uhash_setKeyDeleter(hash, uprv_deleteUObject);
}
}
// Some properties APIs ---------------------------------------------------- ***
-U_CFUNC UNormalizationCheckResult U_EXPORT2
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return impl->getCC(impl->getNorm16(c));
+ } else {
+ return 0;
+ }
+}
+
+U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
return UNORM_YES;
}
}
+U_CFUNC uint16_t
+unorm_getFCD16Simple(UChar32 c) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return UTRIE2_GET16(trie, c);
+ } else {
+ return 0;
+ }
+}
+
U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
#include "mutex.h"
#include "normalizer2impl.h"
#include "uassert.h"
-#include "uhash.h"
#include "uset_imp.h"
#include "utrie2.h"
#include "uvector.h"
CanonIterData::CanonIterData(UErrorCode &errorCode) :
trie(utrie2_open(0, 0, &errorCode)),
- canonStartSets(uhash_deleteUObject, NULL, errorCode) {}
+ canonStartSets(uprv_deleteUObject, NULL, errorCode) {}
CanonIterData::~CanonIterData() {
utrie2_close(trie);
* Get the NF*_QC property for a code point, for u_getIntPropertyValue().
* @internal
*/
-U_CFUNC UNormalizationCheckResult U_EXPORT2
+U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
+/**
+ * Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
+ * @internal
+ */
+U_CFUNC uint16_t
+unorm_getFCD16Simple(UChar32 c);
+
/**
* Internal API, used by collation code.
* Get access to the internal FCD trie table to be able to perform
#include "propname.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
+#include "unicode/uscript.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
U_NAMESPACE_USE
return PropNameData::getPropertyValueEnum(property, alias);
}
+
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_LONG_PROPERTY_NAME);
+}
+
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode){
+ return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
+ U_SHORT_PROPERTY_NAME);
+}
#include "unicode/uchriter.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
-#include "util.h"
#include "cmemory.h"
#include "cstring.h"
// and the time to build these few sets should be small compared to a
// full break iterator build.
fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(gRuleSet_rule_char_pattern, *rb->fStatus);
- UnicodeSet *whitespaceSet = uprv_openPatternWhiteSpaceSet(rb->fStatus);
- if (U_FAILURE(*rb->fStatus)) {
- return;
- }
- fRuleSets[kRuleSet_white_space-128] = *whitespaceSet;
- delete whitespaceSet;
+ // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:]
+ fRuleSets[kRuleSet_white_space-128].add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
fRuleSets[kRuleSet_name_char-128] = UnicodeSet(gRuleSet_name_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_name_start_char-128] = UnicodeSet(gRuleSet_name_start_char_pattern, *rb->fStatus);
fRuleSets[kRuleSet_digit_char-128] = UnicodeSet(gRuleSet_digit_char_pattern, *rb->fStatus);
if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) {
// This case happens if ICU's data is missing. UnicodeSet tries to look up property
- // names from the init string, can't find them, and claims an illegal arguement.
+ // names from the init string, can't find them, and claims an illegal argument.
// Change the error so that the actual problem will be clearer to users.
*rb->fStatus = U_BRK_INIT_ERROR;
}
pos.setIndex(fScanIndex);
startPos = fScanIndex;
UErrorCode localStatus = U_ZERO_ERROR;
- uset = new UnicodeSet(fRB->fRules, pos, USET_IGNORE_SPACE,
- fSymbolTable,
- localStatus);
+ uset = new UnicodeSet();
if (uset == NULL) {
localStatus = U_MEMORY_ALLOCATION_ERROR;
}
+ uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
if (U_FAILURE(localStatus)) {
// TODO: Get more accurate position of the error from UnicodeSet's return info.
// UnicodeSet appears to not be reporting correctly at this time.
DNCache(const Locale& _locale)
: cache(), locale(_locale)
{
- // cache.setKeyDeleter(uhash_deleteUnicodeString);
+ // cache.setKeyDeleter(uprv_deleteUObject);
}
};
// fallback to the one that succeeded, we want to hit the
// cache the first time next goaround.
if (cacheDescriptorList._obj == NULL) {
- cacheDescriptorList._obj = new UVector(uhash_deleteUnicodeString, NULL, 5, status);
+ cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
if (U_FAILURE(status)) {
return NULL;
}
#include "cmemory.h"
#include "servloc.h"
#include "ustrfmt.h"
-#include "uhash.h"
#include "charstr.h"
#include "ucln_cmn.h"
#include "uassert.h"
ServiceEnumeration(const ICULocaleService* service, UErrorCode &status)
: _service(service)
, _timestamp(service->getTimestamp())
- , _ids(uhash_deleteUnicodeString, NULL, status)
+ , _ids(uprv_deleteUObject, NULL, status)
, _pos(0)
{
_service->getVisibleIDs(_ids, status);
ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status)
: _service(other._service)
, _timestamp(other._timestamp)
- , _ids(uhash_deleteUnicodeString, NULL, status)
+ , _ids(uprv_deleteUObject, NULL, status)
, _pos(0)
{
if(U_SUCCESS(status)) {
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
if(U_SUCCESS(errorCode)) {
- uhash_setKeyDeleter(nodes, uhash_deleteUObject);
+ uhash_setKeyDeleter(nodes, uprv_deleteUObject);
}
}
*/
#include "unicode/utypes.h"
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#if !UCONFIG_NO_BREAK_ITERATION
-#include "unicode/ubrk.h"
#include "unicode/utext.h"
#endif
#include "cmemory.h"
#include "ucase.h"
#include "ustr_imp.h"
+U_NAMESPACE_USE
+
/* UCaseMap service object -------------------------------------------------- */
U_CAPI UCaseMap * U_EXPORT2
ucasemap_close(UCaseMap *csm) {
if(csm!=NULL) {
#if !UCONFIG_NO_BREAK_ITERATION
- ubrk_close(csm->iter);
+ // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
+ delete reinterpret_cast<BreakIterator *>(csm->iter);
#endif
uprv_free(csm);
}
csm->options=options;
}
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI const UBreakIterator * U_EXPORT2
-ucasemap_getBreakIterator(const UCaseMap *csm) {
- return csm->iter;
-}
-
-U_CAPI void U_EXPORT2
-ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
- ubrk_close(csm->iter);
- csm->iter=iterToAdopt;
-}
-
-#endif
-
/* UTF-8 string case mappings ----------------------------------------------- */
/* TODO(markus): Move to a new, separate utf8case.c file. */
#if !UCONFIG_NO_BREAK_ITERATION
-/*
- * Internal titlecasing function.
- */
-static int32_t
-_toTitle(UCaseMap *csm,
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
uint8_t *dest, int32_t destCapacity,
- const uint8_t *src, UCaseContext *csc,
- int32_t srcLength,
+ const uint8_t *src, int32_t srcLength,
UErrorCode *pErrorCode) {
- UText utext=UTEXT_INITIALIZER;
const UChar *s;
UChar32 c;
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
UBool isFirstIndex;
- utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
- if(csm->iter==NULL) {
- csm->iter=ubrk_open(UBRK_WORD, csm->locale,
- NULL, 0,
- pErrorCode);
- }
- ubrk_setUText(csm->iter, &utext, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- utext_close(&utext);
- return 0;
- }
+
+ // Use the C++ abstract base class to minimize dependencies.
+ // TODO: Change UCaseMap.iter to store a BreakIterator directly.
+ BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
/* set up local variables */
+ int32_t locCache=csm->locCache;
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
destIndex=0;
prev=0;
isFirstIndex=TRUE;
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=FALSE;
- idx=ubrk_first(csm->iter);
+ idx=bi->first();
} else {
- idx=ubrk_next(csm->iter);
+ idx=bi->next();
}
if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength;
if(titleStart<titleLimit) {
/* titlecase c which is from [titleStart..titleLimit[ */
- csc->cpStart=titleStart;
- csc->cpLimit=titleLimit;
- c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
+ csc.cpStart=titleStart;
+ csc.cpLimit=titleLimit;
+ c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
-
/* Special case Dutch IJ titlecasing */
if ( titleStart+1 < idx &&
- ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
+ ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH &&
( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) &&
( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) {
c=0x004A;
_caseMap(
csm, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
- src, csc,
+ src, &csc,
titleLimit, idx,
pErrorCode);
} else {
if(destIndex>destCapacity) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
- utext_close(&utext);
return destIndex;
}
#endif
+static int32_t U_CALLCONV
+ucasemap_internalUTF8ToLower(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ return _caseMap(
+ csm, ucase_toFullLower,
+ dest, destCapacity,
+ src, &csc, 0, srcLength,
+ pErrorCode);
+}
+
+static int32_t U_CALLCONV
+ucasemap_internalUTF8ToUpper(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
+ return _caseMap(
+ csm, ucase_toFullUpper,
+ dest, destCapacity,
+ src, &csc, 0, srcLength,
+ pErrorCode);
+}
+
static int32_t
utf8_foldCase(const UCaseProps *csp,
uint8_t *dest, int32_t destCapacity,
return destIndex;
}
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
-/* common internal function for public API functions */
+static int32_t U_CALLCONV
+ucasemap_internalUTF8Fold(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
+}
-static int32_t
-caseMap(const UCaseMap *csm,
- uint8_t *dest, int32_t destCapacity,
- const uint8_t *src, int32_t srcLength,
- int32_t toWhichCase,
- UErrorCode *pErrorCode) {
+U_CFUNC int32_t
+ucasemap_mapUTF8(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode) {
int32_t destLength;
/* check argument values */
return 0;
}
- destLength=0;
-
- if(toWhichCase==FOLD_CASE) {
- destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength,
- csm->options, pErrorCode);
- } else {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
- csc.p=(void *)src;
- csc.limit=srcLength;
-
- if(toWhichCase==TO_LOWER) {
- destLength=_caseMap(csm, ucase_toFullLower,
- dest, destCapacity,
- src, &csc,
- 0, srcLength,
- pErrorCode);
- } else if(toWhichCase==TO_UPPER) {
- destLength=_caseMap(csm, ucase_toFullUpper,
- dest, destCapacity,
- src, &csc,
- 0, srcLength,
- pErrorCode);
- } else /* if(toWhichCase==TO_TITLE) */ {
-#if UCONFIG_NO_BREAK_ITERATION
- *pErrorCode=U_UNSUPPORTED_ERROR;
-#else
- /* UCaseMap is actually non-const in toTitle() APIs. */
- UCaseMap *tmp = (UCaseMap *)csm;
- destLength=_toTitle(tmp, dest, destCapacity,
- src, &csc, srcLength,
- pErrorCode);
-#endif
- }
- }
-
+ destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode);
return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
}
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
- return caseMap(csm,
+ return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
- TO_LOWER, pErrorCode);
+ ucasemap_internalUTF8ToLower, pErrorCode);
}
U_CAPI int32_t U_EXPORT2
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
- return caseMap(csm,
- (uint8_t *)dest, destCapacity,
- (const uint8_t *)src, srcLength,
- TO_UPPER, pErrorCode);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_utf8ToTitle(UCaseMap *csm,
- char *dest, int32_t destCapacity,
- const char *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- return caseMap(csm,
+ return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
- TO_TITLE, pErrorCode);
+ ucasemap_internalUTF8ToUpper, pErrorCode);
}
-#endif
-
U_CAPI int32_t U_EXPORT2
ucasemap_utf8FoldCase(const UCaseMap *csm,
char *dest, int32_t destCapacity,
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
- return caseMap(csm,
+ return ucasemap_mapUTF8(csm,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
- FOLD_CASE, pErrorCode);
+ ucasemap_internalUTF8Fold, pErrorCode);
}
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucasemap_titlecase_brkiter.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011jun02
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/ucasemap.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_USE
+
+U_CAPI const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm) {
+ return csm->iter;
+}
+
+U_CAPI void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
+ // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
+ delete reinterpret_cast<BreakIterator *>(csm->iter);
+ csm->iter=iterToAdopt;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ UText utext=UTEXT_INITIALIZER;
+ utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+ if(csm->iter==NULL) {
+ csm->iter=ubrk_open(UBRK_WORD, csm->locale,
+ NULL, 0,
+ pErrorCode);
+ }
+ ubrk_setUText(csm->iter, &utext, pErrorCode);
+ int32_t length=ucasemap_mapUTF8(csm,
+ (uint8_t *)dest, destCapacity,
+ (const uint8_t *)src, srcLength,
+ ucasemap_internalUTF8ToTitle, pErrorCode);
+ utext_close(&utext);
+ return length;
+}
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
#include "uarrsort.h"
#include "uassert.h"
#include "uhash.h"
+#include "ustr_imp.h"
U_NAMESPACE_BEGIN
UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
: LinearMatchNode(len, nextNode), s(units) {
- hash=hash*37+uhash_hashUCharsN(units, len);
+ hash=hash*37+ustr_hashUCharsN(units, len);
}
UBool
/*
******************************************************************************
-* Copyright (C) 1998-2010, International Business Machines Corporation and
+* Copyright (C) 1998-2011, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
#include "unicode/uchriter.h"
#include "unicode/ustring.h"
-#include "uhash.h"
+#include "ustr_imp.h"
U_NAMESPACE_BEGIN
int32_t
UCharCharacterIterator::hashCode() const {
- return uhash_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
+ return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end;
}
CharacterIterator*
/*
******************************************************************************
-* *
-* Copyright (C) 2001-2010, International Business Machines *
-* Corporation and others. All Rights Reserved. *
-* *
+*
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
******************************************************************************
-* file name: ucln_cmn.h
+* file name: ucln.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
#define __UCLN_H__
#include "unicode/utypes.h"
+#include "umutex.h"
/** These are the functions used to register a library's memory cleanup
* functions. Each library should define a single library register function
/**
* Request cleanup for one specific library.
* Not thread safe.
- * Calling this with UCLN_COMMON just calls u_cleanup();
* @param type which library to cleanup
*/
U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type);
+/* ucln_cmn.c variables shared with uinit.c */
+U_CDECL_BEGIN
+
+extern UBool gICUInitialized;
+extern UMTX gICUInitMutex;
+
+U_CDECL_END
+
#endif
/*
******************************************************************************
-* Copyright (C) 2001-2010, International Business Machines
+* Copyright (C) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* file name: ucln_cmn.c
#include "unicode/utypes.h"
#include "unicode/uclean.h"
#include "utracimp.h"
-#include "ustr_imp.h"
#include "ucln_cmn.h"
#include "umutex.h"
#include "ucln.h"
#include "uassert.h"
/** Auto-client for UCLN_COMMON **/
-#define UCLN_TYPE UCLN_COMMON
+#define UCLN_TYPE_IS_COMMON
#include "ucln_imp.h"
+U_CDECL_BEGIN
+
+UBool gICUInitialized = FALSE;
+UMTX gICUInitMutex = NULL;
+
+U_CDECL_END
+
static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT];
static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON];
+/************************************************
+ The cleanup order is important in this function.
+ Please be sure that you have read ucln.h
+ ************************************************/
+U_CAPI void U_EXPORT2
+u_cleanup(void)
+{
+ UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
+ umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
+ umtx_unlock(NULL); /* all state left around by any other threads. */
-/* Enables debugging information about when a library is cleaned up. */
-#ifndef UCLN_DEBUG_CLEANUP
-#define UCLN_DEBUG_CLEANUP 0
-#endif
-
+ ucln_lib_cleanup();
-#if defined(UCLN_DEBUG_CLEANUP)
-#include <stdio.h>
-#endif
+ umtx_destroy(&gICUInitMutex);
+ umtx_cleanup();
+ cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
+ gICUInitialized = FALSE;
+ UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
+/*#if U_ENABLE_TRACING*/
+ utrace_cleanup();
+/*#endif*/
+}
-static void ucln_cleanup_internal(ECleanupLibraryType libType)
+U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
{
if (gLibCleanupFunctions[libType])
{
}
}
-U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType)
-{
- if(libType==UCLN_COMMON) {
-#if UCLN_DEBUG_CLEANUP
- fprintf(stderr, "Cleaning up: UCLN_COMMON with u_cleanup, type %d\n", (int)libType);
-#endif
- u_cleanup();
- } else {
-#if UCLN_DEBUG_CLEANUP
- fprintf(stderr, "Cleaning up: using ucln_cleanup_internal, type %d\n", (int)libType);
-#endif
- ucln_cleanup_internal(libType);
- }
-}
-
-
U_CFUNC void
ucln_common_registerCleanup(ECleanupCommonType type,
cleanupFunc *func)
ECleanupCommonType commonFunc = UCLN_COMMON_START;
for (libType++; libType<UCLN_COMMON; libType++) {
- ucln_cleanup_internal(libType);
+ ucln_cleanupOne(libType);
}
for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) {
/*
******************************************************************************
-* *
-* Copyright (C) 2009, International Business Machines *
-* Corporation and others. All Rights Reserved. *
-* *
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
******************************************************************************
* file name: ucln_imp.h
* encoding: US-ASCII
*/
/*static void ucln_unRegisterAutomaticCleanup();*/
+#ifdef UCLN_TYPE_IS_COMMON
+# define UCLN_CLEAN_ME_UP u_cleanup()
+#else
+# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE)
+#endif
+
/* ------------ automatic cleanup: registration. Choose ONE ------- */
#if defined(UCLN_AUTO_LOCAL)
/* To use:
static void ucln_atexit_handler()
{
- ucln_cleanupOne(UCLN_TYPE);
+ UCLN_CLEAN_ME_UP;
}
static void ucln_registerAutomaticCleanup()
U_CAPI void U_EXPORT2 UCLN_FINI ()
{
/* This function must be defined, if UCLN_FINI is defined, else link error. */
- ucln_cleanupOne(UCLN_TYPE);
+ UCLN_CLEAN_ME_UP;
}
#elif defined(__GNUC__)
/* GCC - use __attribute((destructor)) */
static void ucln_destructor()
{
- ucln_cleanupOne(UCLN_TYPE);
+ UCLN_CLEAN_ME_UP;
}
/* Windows: DllMain */
case DLL_PROCESS_DETACH:
/* Here is the one we actually care about. */
- ucln_cleanupOne(UCLN_TYPE);
+ UCLN_CLEAN_ME_UP;
break;
/*
**********************************************************************
-* Copyright (C) 2000-2009, International Business Machines
+* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnvisci.c
#include "unicode/ucnv.h"
#include "ucnv_cnv.h"
#include "unicode/ucnv_cb.h"
-#include "unicode/uset.h"
#include "cstring.h"
#define UCNV_OPTIONS_VERSION_MASK 0xf
#define PNJ_HA 0x0A39
#define PNJ_RRA 0x0A5C
-static USet* PNJ_BINDI_TIPPI_SET= NULL;
-static USet* PNJ_CONSONANT_SET= NULL;
-
typedef enum {
DEVANAGARI =0,
BENGALI,
{ MALAYALAM, MLM_MASK, MLM }
};
-static void initializeSets() {
- /* TODO: Replace the following two lines with PNJ_CONSONANT_SET = uset_openEmpty(); */
- PNJ_CONSONANT_SET = uset_open(0,0);
- uset_clear(PNJ_CONSONANT_SET);
-
- uset_addRange(PNJ_CONSONANT_SET, 0x0A15, 0x0A28);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A2A, 0x0A30);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A35, 0x0A36);
- uset_addRange(PNJ_CONSONANT_SET, 0x0A38, 0x0A39);
-
- PNJ_BINDI_TIPPI_SET = uset_clone(PNJ_CONSONANT_SET);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A05);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A07);
- uset_add(PNJ_BINDI_TIPPI_SET, 0x0A3F);
- uset_addRange(PNJ_BINDI_TIPPI_SET, 0x0A41, 0x0A42);
-
- uset_compact(PNJ_CONSONANT_SET);
- uset_compact(PNJ_BINDI_TIPPI_SET);
+/*
+ * For special handling of certain Gurmukhi characters.
+ * Bit 0 (value 1): PNJ consonant
+ * Bit 1 (value 2): PNJ Bindi Tippi
+ */
+static const uint8_t pnjMap[80] = {
+ /* 0A00..0A0F */
+ 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0A10..0A1F */
+ 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* 0A20..0A2F */
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3,
+ /* 0A30..0A3F */
+ 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2,
+ /* 0A40..0A4F */
+ 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static UBool
+isPNJConsonant(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] & 1);
+ }
+}
+
+static UBool
+isPNJBindiTippi(UChar32 c) {
+ if (c < 0xa00 || 0xa50 <= c) {
+ return FALSE;
+ } else {
+ return (UBool)(pnjMap[c - 0xa00] >> 1);
+ }
}
static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
return;
}
- /* Ensure that the sets used in special handling of certain Gurmukhi characters are initialized. */
- initializeSets();
-
cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
if (cnv->extraInfo != NULL) {
}
cnv->extraInfo=NULL;
}
- if (PNJ_CONSONANT_SET != NULL) {
- uset_close(PNJ_CONSONANT_SET);
- PNJ_CONSONANT_SET = NULL;
- }
- if (PNJ_BINDI_TIPPI_SET != NULL) {
- uset_close(PNJ_BINDI_TIPPI_SET);
- PNJ_BINDI_TIPPI_SET = NULL;
- }
}
static const char* _ISCIIgetName(const UConverter* cnv) {
converterData->contextCharFromUnicode = 0x00;
break;
}
- if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && uset_contains(PNJ_CONSONANT_SET, (sourceChar + PNJ_DELTA))) {
+ if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
/* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
/* reset context char */
converterData->contextCharFromUnicode = 0x0000;
if (*toUnicodeStatus != missingCharMarker) {
/* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
- if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && uset_contains(PNJ_CONSONANT_SET, data->prevToUnicodeStatus) &&
+ if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
(*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
/* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
offset = (int)(source-args->source - 3);
/* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
* If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
*/
- if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && uset_contains(PNJ_BINDI_TIPPI_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+ if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
targetUniChar = PNJ_TIPPI - PNJ_DELTA;
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
- } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && uset_contains(PNJ_CONSONANT_SET, (*toUnicodeStatus + PNJ_DELTA))) {
+ } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
/* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
} else {
/*
*******************************************************************************
*
-* Copyright (C) 2008-2009, International Business Machines
+* Copyright (C) 2008-2011, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/ucnvsel.h"
+#if !UCONFIG_NO_CONVERSION
+
#include <string.h>
#include "unicode/uchar.h"
}
return selectForMask(sel, mask, status);
}
+
+#endif // !UCONFIG_NO_CONVERSION
/*
******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
#include "cstring.h"
#include "cmemory.h"
#include "uassert.h"
+#include "ustr_imp.h"
/* This hashtable is implemented as a double hash. All elements are
* stored in a single array with no secondary storage for collision
* PUBLIC Key Hash Functions
********************************************************************/
-/*
- Compute the hash by iterating sparsely over about 32 (up to 63)
- characters spaced evenly through the string. For each character,
- multiply the previous hash value by a prime number and add the new
- character in, like a linear congruential random number generator,
- producing a pseudorandom deterministic value well distributed over
- the output range. [LIU]
-*/
-
-#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
- int32_t hash = 0; \
- const TYPE *p = (const TYPE*) STR; \
- if (p != NULL) { \
- int32_t len = (int32_t)(STRLEN); \
- int32_t inc = ((len - 32) / 32) + 1; \
- const TYPE *limit = p + len; \
- while (p<limit) { \
- hash = (hash * 37) + DEREF; \
- p += inc; \
- } \
- } \
- return hash
-
U_CAPI int32_t U_EXPORT2
uhash_hashUChars(const UHashTok key) {
- STRING_HASH(UChar, key.pointer, u_strlen(p), *p);
-}
-
-/* Used by UnicodeString to compute its hashcode - Not public API. */
-U_CAPI int32_t U_EXPORT2
-uhash_hashUCharsN(const UChar *str, int32_t length) {
- STRING_HASH(UChar, str, length, *p);
-}
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashCharsN(const char *str, int32_t length) {
- STRING_HASH(char, str, length, *p);
+ const UChar *s = (const UChar *)key.pointer;
+ return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s));
}
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key) {
- STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), *p);
+ const char *s = (const char *)key.pointer;
+ return s == NULL ? 0 : ustr_hashCharsN(s, uprv_strlen(s));
}
U_CAPI int32_t U_EXPORT2
uhash_hashIChars(const UHashTok key) {
- STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), uprv_tolower(*p));
+ const char *s = (const char *)key.pointer;
+ return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s));
}
U_CAPI UBool U_EXPORT2
uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
-
int32_t count1, count2, pos, i;
if(hash1==hash2){
uhash_compareLong(const UHashTok key1, const UHashTok key2) {
return (UBool)(key1.integer == key2.integer);
}
-
-/********************************************************************
- * PUBLIC Deleter Functions
- ********************************************************************/
-
-U_CAPI void U_EXPORT2
-uhash_freeBlock(void *obj) {
- uprv_free(obj);
-}
-
/*
******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
#define UHASH_H
#include "unicode/utypes.h"
+#include "cmemory.h"
/**
* UHashtable stores key-value pairs and does moderately fast lookup
*/
typedef UBool U_CALLCONV UValueComparator(const UHashTok val1,
const UHashTok val2);
-/**
- * A function called by <TT>uhash_remove</TT>,
- * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete
- * an existing key or value.
- * @param obj A key or value stored in a hashtable
- * @see uhash_deleteUObject
- */
-typedef void U_CALLCONV UObjectDeleter(void* obj);
+
+/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
/**
* This specifies whether or not, and how, the hastable resizes itself.
U_CAPI int32_t U_EXPORT2
uhash_hashChars(const UHashTok key);
-/* Used by UnicodeString to compute its hashcode - Not public API. */
-U_CAPI int32_t U_EXPORT2
-uhash_hashUCharsN(const UChar *key, int32_t length);
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashCharsN(const char *key, int32_t length);
-
/**
* Generate a case-insensitive hash code for a null-terminated char*
* string. If the string is not null-terminated do not use this
U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2);
-/**
- * Deleter function for UnicodeString* keys or values.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2
-uhash_deleteUnicodeString(void *obj);
-
/********************************************************************
* int32_t Support Functions
********************************************************************/
U_CAPI void U_EXPORT2
uhash_deleteHashtable(void *obj);
-/**
- * Deleter for UObject instances.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2
-uhash_deleteUObject(void *obj);
-
-/**
- * Deleter for any key or value allocated using uprv_malloc. Calls
- * uprv_free.
- * @param obj The object to be deleted
- */
-U_CAPI void U_EXPORT2
-uhash_freeBlock(void *obj);
+/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
/**
* Checks if the given hash tables are equal or not.
/*
******************************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
* Date Name Description
******************************************************************************
*/
-#include "uhash.h"
#include "hash.h"
-#include "uvector.h"
-#include "unicode/unistr.h"
-#include "unicode/uchar.h"
-
-/********************************************************************
- * PUBLIC UnicodeString support functions for UHashtable
- ********************************************************************/
-
-U_CAPI int32_t U_EXPORT2
-uhash_hashUnicodeString(const UHashTok key) {
- U_NAMESPACE_USE
- const UnicodeString *str = (const UnicodeString*) key.pointer;
- return (str == NULL) ? 0 : str->hashCode();
-}
-
-U_CAPI void U_EXPORT2
-uhash_deleteUnicodeString(void *obj) {
- U_NAMESPACE_USE
- delete (UnicodeString*) obj;
-}
-
-U_CAPI UBool U_EXPORT2
-uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
- U_NAMESPACE_USE
- const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
- const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
- if (str1 == str2) {
- return TRUE;
- }
- if (str1 == NULL || str2 == NULL) {
- return FALSE;
- }
- return *str1 == *str2;
-}
/**
* Deleter for Hashtable objects.
delete (Hashtable*) obj;
}
-/**
- * Deleter for UObject instances.
- */
-U_CAPI void U_EXPORT2
-uhash_deleteUObject(void *obj) {
- U_NAMESPACE_USE
- delete (UObject*) obj;
-}
-
//eof
/*
******************************************************************************
* *
-* Copyright (C) 2001-2010, International Business Machines *
+* Copyright (C) 2001-2011, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
******************************************************************************
#include "unicode/uclean.h"
#include "cmemory.h"
#include "icuplugimp.h"
-#include "uassert.h"
#include "ucln.h"
-#include "ucln_cmn.h"
#include "ucnv_io.h"
#include "umutex.h"
#include "utracimp.h"
-static UBool gICUInitialized = FALSE;
-static UMTX gICUInitMutex = NULL;
-
-
-/************************************************
- The cleanup order is important in this function.
- Please be sure that you have read ucln.h
- ************************************************/
-U_CAPI void U_EXPORT2
-u_cleanup(void)
-{
- UTRACE_ENTRY_OC(UTRACE_U_CLEANUP);
- umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */
- umtx_unlock(NULL); /* all state left around by any other threads. */
-
- ucln_lib_cleanup();
-
- umtx_destroy(&gICUInitMutex);
- umtx_cleanup();
- cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */
- gICUInitialized = FALSE;
- UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */
-/*#if U_ENABLE_TRACING*/
- utrace_cleanup();
-/*#endif*/
-}
-
/*
* ICU Initialization Function. Need not be called.
*/
/*
*******************************************************************************
*
-* Copyright (C) 2008-2010, International Business Machines
+* Copyright (C) 2008-2011, International Business Machines
* Corporation, Google and others. All Rights Reserved.
*
*******************************************************************************
#ifndef __ICU_UCNV_SEL_H__
#define __ICU_UCNV_SEL_H__
-#include "unicode/uset.h"
#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/uset.h"
#include "unicode/utf16.h"
#include "unicode/uenum.h"
#include "unicode/ucnv.h"
ucnvsel_selectForUTF8(const UConverterSelector* sel,
const char *s, int32_t length, UErrorCode *status);
+#endif /* !UCONFIG_NO_CONVERSION */
+
#endif /* __ICU_UCNV_SEL_H__ */
class BMPSet;
class ParsePosition;
+class RBBIRuleScanner;
class SymbolTable;
class UnicodeSetStringSpan;
class UVector;
virtual UBool matchesIndexValue(uint8_t v) const;
private:
+ friend class RBBIRuleScanner;
//----------------------------------------------------------------
// Implementation: Clone as thawed (see ICU4J Freezable)
// Implementation: Pattern parsing
//----------------------------------------------------------------
+ void applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
void applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec);
//----------------------------------------------------------------
#include "unicode/std_string.h"
#include "unicode/stringpiece.h"
#include "unicode/bytestream.h"
+#include "unicode/ucasemap.h"
struct UConverter; // unicode/ucnv.h
class StringThreadTest;
u_strlen(const UChar *s);
#endif
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * Internal string case mapping function type.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
int32_t **pBufferToDelete = 0,
UBool forceClone = FALSE);
- // common function for case mappings
+ /**
+ * Common function for UnicodeString case mappings.
+ * The stringCaseMapper has the same type UStringCaseMapper
+ * as in ustr_imp.h for ustrcase_map().
+ */
UnicodeString &
- caseMap(BreakIterator *titleIter,
- const char *locale,
- uint32_t options,
- int32_t toWhichCase);
+ caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
// ref counting
void addRef(void);
if (U_FAILURE(status)) {
return FALSE;
}
- strings = new UVector(uhash_deleteUnicodeString,
+ strings = new UVector(uprv_deleteUObject,
uhash_compareUnicodeString, 1, status);
if (strings == NULL) { // Check for memory allocation error.
status = U_MEMORY_ALLOCATION_ERROR;
--- /dev/null
+/*
+*******************************************************************************
+*
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uniset_closure.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp
+* to simplify dependencies.
+* In particular, this depends on the BreakIterator, but the BreakIterator
+* code also builds UnicodeSets from patterns and needs uniset_props.
+*/
+
+#include "unicode/brkiter.h"
+#include "unicode/locid.h"
+#include "unicode/parsepos.h"
+#include "unicode/uniset.h"
+#include "cmemory.h"
+#include "ruleiter.h"
+#include "ucase.h"
+#include "util.h"
+#include "uvector.h"
+
+// initial storage. Must be >= 0
+// *** same as in uniset.cpp ! ***
+#define START_EXTRA 16
+
+U_NAMESPACE_BEGIN
+
+// TODO memory debugging provided inside uniset.cpp
+// could be made available here but probably obsolete with use of modern
+// memory leak checker tools
+#define _dbgct(me)
+
+//----------------------------------------------------------------
+// Constructors &c
+//----------------------------------------------------------------
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) :
+ len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+ bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
+ fFlags(0)
+{
+ if(U_SUCCESS(status)){
+ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
+ /* test for NULL */
+ if(list == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }else{
+ allocateStrings(status);
+ applyPattern(pattern, options, symbols, status);
+ }
+ }
+ _dbgct(this);
+}
+
+UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) :
+ len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
+ bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
+ fFlags(0)
+{
+ if(U_SUCCESS(status)){
+ list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
+ /* test for NULL */
+ if(list == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }else{
+ allocateStrings(status);
+ applyPattern(pattern, pos, options, symbols, status);
+ }
+ }
+ _dbgct(this);
+}
+
+//----------------------------------------------------------------
+// Public API
+//----------------------------------------------------------------
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ ParsePosition pos(0);
+ applyPattern(pattern, pos, options, symbols, status);
+ if (U_FAILURE(status)) return *this;
+
+ int32_t i = pos.getIndex();
+
+ if (options & USET_IGNORE_SPACE) {
+ // Skip over trailing whitespace
+ ICU_Utility::skipWhitespace(pattern, i, TRUE);
+ }
+
+ if (i != pattern.length()) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return *this;
+}
+
+UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ if (isFrozen()) {
+ status = U_NO_WRITE_PERMISSION;
+ return *this;
+ }
+ // Need to build the pattern in a temporary string because
+ // _applyPattern calls add() etc., which set pat to empty.
+ UnicodeString rebuiltPat;
+ RuleCharacterIterator chars(pattern, symbols, pos);
+ applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
+ if (U_FAILURE(status)) return *this;
+ if (chars.inVariable()) {
+ // syntaxError(chars, "Extra chars in variable value");
+ status = U_MALFORMED_SET;
+ return *this;
+ }
+ setPattern(rebuiltPat);
+ return *this;
+}
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+ ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+ ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+ ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+//----------------------------------------------------------------
+// Case folding API
+//----------------------------------------------------------------
+
+// add the result of a full case mapping to the set
+// use str as a temporary string to avoid constructing one
+static inline void
+addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
+ if(result >= 0) {
+ if(result > UCASE_MAX_STRING_LENGTH) {
+ // add a single-code point case mapping
+ set.add(result);
+ } else {
+ // add a string case mapping from full with length result
+ str.setTo((UBool)FALSE, full, result);
+ set.add(str);
+ }
+ }
+ // result < 0: the code point mapped to itself, no need to add it
+ // see ucase.h
+}
+
+UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
+ if (isFrozen() || isBogus()) {
+ return *this;
+ }
+ if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
+ const UCaseProps *csp = ucase_getSingleton();
+ {
+ UnicodeSet foldSet(*this);
+ UnicodeString str;
+ USetAdder sa = {
+ foldSet.toUSet(),
+ _set_add,
+ _set_addRange,
+ _set_addString,
+ NULL, // don't need remove()
+ NULL // don't need removeRange()
+ };
+
+ // start with input set to guarantee inclusion
+ // USET_CASE: remove strings because the strings will actually be reduced (folded);
+ // therefore, start with no strings and add only those needed
+ if (attribute & USET_CASE_INSENSITIVE) {
+ foldSet.strings->removeAllElements();
+ }
+
+ int32_t n = getRangeCount();
+ UChar32 result;
+ const UChar *full;
+ int32_t locCache = 0;
+
+ for (int32_t i=0; i<n; ++i) {
+ UChar32 start = getRangeStart(i);
+ UChar32 end = getRangeEnd(i);
+
+ if (attribute & USET_CASE_INSENSITIVE) {
+ // full case closure
+ for (UChar32 cp=start; cp<=end; ++cp) {
+ ucase_addCaseClosure(csp, cp, &sa);
+ }
+ } else {
+ // add case mappings
+ // (does not add long s for regular s, or Kelvin for k, for example)
+ for (UChar32 cp=start; cp<=end; ++cp) {
+ result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
+ addCaseMapping(foldSet, result, full, str);
+
+ result = ucase_toFullFolding(csp, cp, &full, 0);
+ addCaseMapping(foldSet, result, full, str);
+ }
+ }
+ }
+ if (strings != NULL && strings->size() > 0) {
+ if (attribute & USET_CASE_INSENSITIVE) {
+ for (int32_t j=0; j<strings->size(); ++j) {
+ str = *(const UnicodeString *) strings->elementAt(j);
+ str.foldCase();
+ if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
+ foldSet.add(str); // does not map to code points: add the folded string itself
+ }
+ }
+ } else {
+ Locale root("");
+#if !UCONFIG_NO_BREAK_ITERATION
+ UErrorCode status = U_ZERO_ERROR;
+ BreakIterator *bi = BreakIterator::createWordInstance(root, status);
+ if (U_SUCCESS(status)) {
+#endif
+ const UnicodeString *pStr;
+
+ for (int32_t j=0; j<strings->size(); ++j) {
+ pStr = (const UnicodeString *) strings->elementAt(j);
+ (str = *pStr).toLower(root);
+ foldSet.add(str);
+#if !UCONFIG_NO_BREAK_ITERATION
+ (str = *pStr).toTitle(bi, root);
+ foldSet.add(str);
+#endif
+ (str = *pStr).toUpper(root);
+ foldSet.add(str);
+ (str = *pStr).foldCase();
+ foldSet.add(str);
+ }
+#if !UCONFIG_NO_BREAK_ITERATION
+ }
+ delete bi;
+#endif
+ }
+ }
+ *this = foldSet;
+ }
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
fFlags(0)
-{
- if(U_SUCCESS(status)){
- list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
- /* test for NULL */
- if(list == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }else{
- allocateStrings(status);
- applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
- }
- }
- _dbgct(this);
-}
-
-/**
- * Constructs a set from the given pattern, optionally ignoring
- * white space. See the class description for the syntax of the
- * pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- */
-UnicodeSet::UnicodeSet(const UnicodeString& pattern,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) :
- len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
- bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
- fFlags(0)
-{
- if(U_SUCCESS(status)){
- list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
- /* test for NULL */
- if(list == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }else{
- allocateStrings(status);
- applyPattern(pattern, options, symbols, status);
- }
- }
- _dbgct(this);
-}
-
-UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) :
- len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
- bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
- fFlags(0)
{
if(U_SUCCESS(status)){
list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
/* test for NULL */
if(list == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
+ status = U_MEMORY_ALLOCATION_ERROR;
}else{
allocateStrings(status);
- applyPattern(pattern, pos, options, symbols, status);
+ applyPattern(pattern, status);
}
}
_dbgct(this);
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
UErrorCode& status) {
- return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
-}
-
-
-/**
- * Modifies this set to represent the set specified by the given
- * pattern, optionally ignoring white space. See the class
- * description for the syntax of the pattern language.
- * @param pattern a string specifying what characters are in the set
- * @param options bitmask for options to apply to the pattern.
- * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
- */
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- if (U_FAILURE(status) || isFrozen()) {
- return *this;
- }
-
+ // Equivalent to
+ // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
+ // but without dependency on closeOver().
ParsePosition pos(0);
- applyPattern(pattern, pos, options, symbols, status);
+ applyPatternIgnoreSpace(pattern, pos, NULL, status);
if (U_FAILURE(status)) return *this;
int32_t i = pos.getIndex();
-
- if (options & USET_IGNORE_SPACE) {
- // Skip over trailing whitespace
- ICU_Utility::skipWhitespace(pattern, i, TRUE);
- }
-
+ // Skip over trailing whitespace
+ ICU_Utility::skipWhitespace(pattern, i, TRUE);
if (i != pattern.length()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
}
return *this;
}
-UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
- ParsePosition& pos,
- uint32_t options,
- const SymbolTable* symbols,
- UErrorCode& status) {
- if (U_FAILURE(status) || isFrozen()) {
- return *this;
+void
+UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (isFrozen()) {
+ status = U_NO_WRITE_PERMISSION;
+ return;
}
// Need to build the pattern in a temporary string because
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
- applyPattern(chars, symbols, rebuiltPat, options, status);
- if (U_FAILURE(status)) return *this;
+ applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
+ if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
status = U_MALFORMED_SET;
- return *this;
+ return;
}
setPattern(rebuiltPat);
- return *this;
}
/**
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
}
switch (setMode) {
case 1:
- nested->applyPattern(chars, symbols, patLocal, options, ec);
+ nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
break;
case 2:
chars.skipIgnored(opts);
* patterns like /[^abc]/i work.
*/
if ((options & USET_CASE_INSENSITIVE) != 0) {
- closeOver(USET_CASE_INSENSITIVE);
+ (this->*caseClosure)(USET_CASE_INSENSITIVE);
}
else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
- closeOver(USET_ADD_CASE_MAPPINGS);
+ (this->*caseClosure)(USET_ADD_CASE_MAPPINGS);
}
if (invert) {
complement();
rebuiltPat.append(pattern, 0, pos.getIndex());
}
-//----------------------------------------------------------------
-// Case folding API
-//----------------------------------------------------------------
-
-// add the result of a full case mapping to the set
-// use str as a temporary string to avoid constructing one
-static inline void
-addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
- if(result >= 0) {
- if(result > UCASE_MAX_STRING_LENGTH) {
- // add a single-code point case mapping
- set.add(result);
- } else {
- // add a string case mapping from full with length result
- str.setTo((UBool)FALSE, full, result);
- set.add(str);
- }
- }
- // result < 0: the code point mapped to itself, no need to add it
- // see ucase.h
-}
-
-UnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
- if (isFrozen() || isBogus()) {
- return *this;
- }
- if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
- const UCaseProps *csp = ucase_getSingleton();
- {
- UnicodeSet foldSet(*this);
- UnicodeString str;
- USetAdder sa = {
- foldSet.toUSet(),
- _set_add,
- _set_addRange,
- _set_addString,
- NULL, // don't need remove()
- NULL // don't need removeRange()
- };
-
- // start with input set to guarantee inclusion
- // USET_CASE: remove strings because the strings will actually be reduced (folded);
- // therefore, start with no strings and add only those needed
- if (attribute & USET_CASE_INSENSITIVE) {
- foldSet.strings->removeAllElements();
- }
-
- int32_t n = getRangeCount();
- UChar32 result;
- const UChar *full;
- int32_t locCache = 0;
-
- for (int32_t i=0; i<n; ++i) {
- UChar32 start = getRangeStart(i);
- UChar32 end = getRangeEnd(i);
-
- if (attribute & USET_CASE_INSENSITIVE) {
- // full case closure
- for (UChar32 cp=start; cp<=end; ++cp) {
- ucase_addCaseClosure(csp, cp, &sa);
- }
- } else {
- // add case mappings
- // (does not add long s for regular s, or Kelvin for k, for example)
- for (UChar32 cp=start; cp<=end; ++cp) {
- result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
- addCaseMapping(foldSet, result, full, str);
-
- result = ucase_toFullFolding(csp, cp, &full, 0);
- addCaseMapping(foldSet, result, full, str);
- }
- }
- }
- if (strings != NULL && strings->size() > 0) {
- if (attribute & USET_CASE_INSENSITIVE) {
- for (int32_t j=0; j<strings->size(); ++j) {
- str = *(const UnicodeString *) strings->elementAt(j);
- str.foldCase();
- if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
- foldSet.add(str); // does not map to code points: add the folded string itself
- }
- }
- } else {
- Locale root("");
-#if !UCONFIG_NO_BREAK_ITERATION
- UErrorCode status = U_ZERO_ERROR;
- BreakIterator *bi = BreakIterator::createWordInstance(root, status);
- if (U_SUCCESS(status)) {
-#endif
- const UnicodeString *pStr;
-
- for (int32_t j=0; j<strings->size(); ++j) {
- pStr = (const UnicodeString *) strings->elementAt(j);
- (str = *pStr).toLower(root);
- foldSet.add(str);
-#if !UCONFIG_NO_BREAK_ITERATION
- (str = *pStr).toTitle(bi, root);
- foldSet.add(str);
-#endif
- (str = *pStr).toUpper(root);
- foldSet.add(str);
- (str = *pStr).foldCase();
- foldSet.add(str);
- }
-#if !UCONFIG_NO_BREAK_ITERATION
- }
- delete bi;
-#endif
- }
- }
- *this = foldSet;
- }
- }
- return *this;
-}
-
U_NAMESPACE_END
{
/* Delegate hash computation to uhash. This makes UnicodeString
* hashing consistent with UChar* hashing. */
- int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
+ int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
if (hashCode == kInvalidHashCode) {
hashCode = kEmptyHashCode;
}
U_NAMESPACE_END
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+uhash_hashUnicodeString(const UHashTok key) {
+ const UnicodeString *str = (const UnicodeString*) key.pointer;
+ return (str == NULL) ? 0 : str->hashCode();
+}
+
+// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
+// does not depend on hashtable code.
+U_CAPI UBool U_EXPORT2
+uhash_compareUnicodeString(const UHashTok key1, const UHashTok key2) {
+ const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
+ const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
+ if (str1 == str2) {
+ return TRUE;
+ }
+ if (str1 == NULL || str2 == NULL) {
+ return FALSE;
+ }
+ return *str1 == *str2;
+}
+
#ifdef U_STATIC_IMPLEMENTATION
/*
This should never be called. It is defined here to make sure that the
This makes sure that static library dependencies are kept to a minimum.
*/
static void uprv_UnicodeStringDummy(void) {
- U_NAMESPACE_USE
delete [] (new UnicodeString[2]);
}
#endif
/*
*******************************************************************************
*
-* Copyright (C) 1999-2010, International Business Machines
+* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/utypes.h"
#include "unicode/putil.h"
-#include "unicode/locid.h"
#include "cstring.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
-#include "unicode/ubrk.h"
#include "ustr_imp.h"
#include "uhash.h"
// Write implementation
//========================================
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
UnicodeString &
-UnicodeString::caseMap(BreakIterator *titleIter,
- const char *locale,
- uint32_t options,
- int32_t toWhichCase) {
+UnicodeString::caseMap(const UCaseMap *csm,
+ UStringCaseMapper *stringCaseMapper) {
if(isEmpty() || !isWritable()) {
// nothing to do
return *this;
}
- const UCaseProps *csp=ucase_getSingleton();
-
// We need to allocate a new buffer for the internal string case mapping function.
// This is very similar to how doReplace() keeps the old array pointer
// and deletes the old array itself after it is done.
int32_t newLength;
do {
errorCode = U_ZERO_ERROR;
- if(toWhichCase==TO_LOWER) {
- newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
- oldArray, oldLength,
- locale, &errorCode);
- } else if(toWhichCase==TO_UPPER) {
- newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
- oldArray, oldLength,
- locale, &errorCode);
- } else if(toWhichCase==TO_TITLE) {
-#if UCONFIG_NO_BREAK_ITERATION
- errorCode=U_UNSUPPORTED_ERROR;
-#else
- newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
- oldArray, oldLength,
- (UBreakIterator *)titleIter, locale, options, &errorCode);
-#endif
- } else {
- newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
- oldArray, oldLength,
- options,
- &errorCode);
- }
+ newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
+ oldArray, oldLength, &errorCode);
setLength(newLength);
} while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
return *this;
}
-UnicodeString &
-UnicodeString::toLower() {
- return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
-}
-
-UnicodeString &
-UnicodeString::toLower(const Locale &locale) {
- return caseMap(0, locale.getName(), 0, TO_LOWER);
-}
-
-UnicodeString &
-UnicodeString::toUpper() {
- return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
-}
-
-UnicodeString &
-UnicodeString::toUpper(const Locale &locale) {
- return caseMap(0, locale.getName(), 0, TO_UPPER);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter) {
- return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
- return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
-}
-
-UnicodeString &
-UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
- return caseMap(titleIter, locale.getName(), options, TO_TITLE);
-}
-
-#endif
-
UnicodeString &
UnicodeString::foldCase(uint32_t options) {
- /* The Locale parameter isn't used. Use "" instead. */
- return caseMap(0, "", options, FOLD_CASE);
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ csm.csp=ucase_getSingleton();
+ csm.options=options;
+ return caseMap(&csm, ustrcase_internalFold);
}
U_NAMESPACE_END
}
return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
}
-
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: unistr_case_locale.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may31
+* created by: Markus W. Scherer
+*
+* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/locid.h"
+#include "unicode/unistr.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+U_NAMESPACE_BEGIN
+
+//========================================
+// Write implementation
+//========================================
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+ if(csm->csp==NULL) {
+ csm->csp=ucase_getSingleton();
+ }
+ if(locale!=NULL && locale[0]==0) {
+ csm->locale[0]=0;
+ } else {
+ ustrcase_setTempCaseMapLocale(csm, locale);
+ }
+}
+
+UnicodeString &
+UnicodeString::toLower() {
+ return toLower(Locale::getDefault());
+}
+
+UnicodeString &
+UnicodeString::toLower(const Locale &locale) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ setTempCaseMap(&csm, locale.getName());
+ return caseMap(&csm, ustrcase_internalToLower);
+}
+
+UnicodeString &
+UnicodeString::toUpper() {
+ return toUpper(Locale::getDefault());
+}
+
+UnicodeString &
+UnicodeString::toUpper(const Locale &locale) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ setTempCaseMap(&csm, locale.getName());
+ return caseMap(&csm, ustrcase_internalToUpper);
+}
+
+U_NAMESPACE_END
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: unistr_titlecase_brkiter.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+
+static int32_t U_CALLCONV
+unistr_case_internalToTitle(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ ubrk_setText(csm->iter, src, srcLength, pErrorCode);
+ return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, pErrorCode);
+}
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+ if(csm->csp==NULL) {
+ csm->csp=ucase_getSingleton();
+ }
+ if(locale!=NULL && locale[0]==0) {
+ csm->locale[0]=0;
+ } else {
+ ustrcase_setTempCaseMapLocale(csm, locale);
+ }
+}
+
+U_NAMESPACE_BEGIN
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter) {
+ return toTitle(titleIter, Locale::getDefault(), 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
+ return toTitle(titleIter, locale, 0);
+}
+
+UnicodeString &
+UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ csm.options=options;
+ setTempCaseMap(&csm, locale.getName());
+ BreakIterator *bi=titleIter;
+ if(bi==NULL) {
+ UErrorCode errorCode=U_ZERO_ERROR;
+ bi=BreakIterator::createWordInstance(locale, errorCode);
+ if(U_FAILURE(errorCode)) {
+ setToBogus();
+ return *this;
+ }
+ }
+ csm.iter=reinterpret_cast<UBreakIterator *>(bi);
+ caseMap(&csm, unistr_case_internalToTitle);
+ if(titleIter==NULL) {
+ delete bi;
+ }
+ return *this;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
/*
******************************************************************************
*
-* Copyright (C) 2002-2008, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
U_NAMESPACE_END
+U_NAMESPACE_USE
+U_CAPI void U_EXPORT2
+uprv_deleteUObject(void *obj) {
+ delete reinterpret_cast<UObject *>(obj);
+}
/*
*******************************************************************************
*
-* Copyright (C) 2002-2010, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
}
}
-#if !UCONFIG_NO_NORMALIZATION
-
-U_CAPI uint8_t U_EXPORT2
-u_getCombiningClass(UChar32 c) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
- if(U_SUCCESS(errorCode)) {
- return impl->getCC(impl->getNorm16(c));
- } else {
- return 0;
- }
-}
-
-static uint16_t
-getFCD16(UChar32 c) {
- UErrorCode errorCode=U_ZERO_ERROR;
- const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
- if(U_SUCCESS(errorCode)) {
- return UTRIE2_GET16(trie, c);
- } else {
- return 0;
- }
-}
-
-#endif
-
struct IntProperty;
typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
}
#else
static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return getFCD16(c)>>8;
+ return unorm_getFCD16Simple(c)>>8;
}
#endif
}
#else
static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
- return getFCD16(c)&0xff;
+ return unorm_getFCD16Simple(c)&0xff;
}
#endif
/*
**********************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
}
return numFilled;
}
-
-U_CAPI const char* U_EXPORT2
-uscript_getName(UScriptCode scriptCode){
- return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
- U_LONG_PROPERTY_NAME);
-}
-
-U_CAPI const char* U_EXPORT2
-uscript_getShortName(UScriptCode scriptCode){
- return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
- U_SHORT_PROPERTY_NAME);
-}
-
((UnicodeSet*) set)->UnicodeSet::clear();
}
-U_CAPI void U_EXPORT2
-uset_closeOver(USet* set, int32_t attributes) {
- ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
-}
-
U_CAPI void U_EXPORT2
uset_removeAllStrings(USet* set) {
((UnicodeSet*) set)->UnicodeSet::removeAllStrings();
/*
*******************************************************************************
*
-* Copyright (C) 2002-2006, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*ec = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
-
+
if (U_FAILURE(*ec)) {
delete set;
set = NULL;
*ec = U_MEMORY_ALLOCATION_ERROR;
return 0;
}
-
+
if (U_FAILURE(*ec)) {
delete set;
set = NULL;
((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable);
return pat.extract(result, resultCapacity, *ec);
}
+
+U_CAPI void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes) {
+ ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes);
+}
#include "unicode/uiter.h"
#include "ucase.h"
-/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. */
+/** Simple declaration to avoid including unicode/ubrk.h. */
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
typedef struct UBreakIterator UBreakIterator;
# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
#endif
-enum {
- TO_LOWER,
- TO_UPPER,
- TO_TITLE,
- FOLD_CASE
-};
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
-U_CFUNC int32_t
-ustr_toLower(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
-U_CFUNC int32_t
-ustr_toUpper(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode);
+/**
+ * String case mapping function type, used by ustrcase_map().
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
#if !UCONFIG_NO_BREAK_ITERATION
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/** Implements UStringCaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Implements argument checking and buffer handling
+ * for string case mapping as a common function.
+ */
U_CFUNC int32_t
-ustr_toTitle(const UCaseProps *csp,
+ustrcase_map(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale, uint32_t options,
+ UStringCaseMapper *stringCaseMapper,
UErrorCode *pErrorCode);
-#endif
+/**
+ * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
+ * UTF-8 version of UStringCaseMapper.
+ * All error checking must be done.
+ * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
+ * src and dest must not overlap.
+ */
+typedef int32_t U_CALLCONV
+UTF8CaseMapper(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/** Implements UTF8CaseMapper. */
+U_CFUNC int32_t U_CALLCONV
+ucasemap_internalUTF8ToTitle(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
/**
- * Internal case folding function.
+ * Implements argument checking and buffer handling
+ * for UTF-8 string case mapping as a common function.
*/
U_CFUNC int32_t
-ustr_foldCase(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- uint32_t options,
- UErrorCode *pErrorCode);
+ucasemap_mapUTF8(const UCaseMap *csm,
+ uint8_t *dest, int32_t destCapacity,
+ const uint8_t *src, int32_t srcLength,
+ UTF8CaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length);
/**
* NUL-terminate a UChar * string if possible.
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ustr_titlecase_brkiter.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may30
+* created by: Markus W. Scherer
+*
+* Titlecasing functions that are based on BreakIterator
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/ubrk.h"
+#include "unicode/ucasemap.h"
+#include "cmemory.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+/* functions available in the common library (for unistr_case.cpp) */
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ * Duplicate of the same function in ustrcase.cpp, to keep it inline.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+ if(csm->csp==NULL) {
+ csm->csp=ucase_getSingleton();
+ }
+ if(locale!=NULL && locale[0]==0) {
+ csm->locale[0]=0;
+ } else {
+ ustrcase_setTempCaseMapLocale(csm, locale);
+ }
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ setTempCaseMap(&csm, locale);
+ if(titleIter!=NULL) {
+ ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode);
+ } else {
+ csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode);
+ }
+ int32_t length=ustrcase_map(
+ &csm,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToTitle, pErrorCode);
+ if(titleIter==NULL && csm.iter!=NULL) {
+ ubrk_close(csm.iter);
+ }
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ if(csm->iter!=NULL) {
+ ubrk_setText(csm->iter, src, srcLength, pErrorCode);
+ } else {
+ csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
+ }
+ return ustrcase_map(
+ csm,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToTitle, pErrorCode);
+}
+
+#endif // !UCONFIG_NO_BREAK_ITERATION
*/
#include "unicode/utypes.h"
-#include "unicode/uloc.h"
+#include "unicode/brkiter.h"
#include "unicode/ustring.h"
#include "unicode/ucasemap.h"
#include "unicode/ubrk.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+U_NAMESPACE_USE
+
/* string casing ------------------------------------------------------------ */
-/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
+/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
static inline int32_t
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
int32_t result, const UChar *s) {
return destIndex;
}
-static void
-setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode * /*pErrorCode*/) {
- /*
- * We could call ucasemap_setLocale(), but here we really only care about
- * the initial language subtag, we need not return the real string via
- * ucasemap_getLocale(), and we don't care about only getting "x" from
- * "x-some-thing" etc.
- *
- * We ignore locales with a longer-than-3 initial subtag.
- *
- * We also do not fill in the locCache because it is rarely used,
- * and not worth setting unless we reuse it for many case mapping operations.
- * (That's why UCaseMap was created.)
- */
- int i;
- char c;
-
- /* the internal functions require locale!=NULL */
- if(locale==NULL) {
- locale=uloc_getDefault();
- }
- for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
- csm->locale[i]=c;
- }
- if(i<=3) {
- csm->locale[i]=0; /* Up to 3 non-separator characters. */
- } else {
- csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
- }
-}
-
-/*
- * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
- * Do this fast because it is called with every function call.
- */
-static inline void
-setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
- if(csm->csp==NULL) {
- csm->csp=ucase_getSingleton();
- }
- if(locale!=NULL && locale[0]==0) {
- csm->locale[0]=0;
- } else {
- setTempCaseMapLocale(csm, locale, pErrorCode);
- }
-}
-
#if !UCONFIG_NO_BREAK_ITERATION
-/*
- * Internal titlecasing function.
- */
-static int32_t
-_toTitle(UCaseMap *csm,
- UChar *dest, int32_t destCapacity,
- const UChar *src, UCaseContext *csc,
- int32_t srcLength,
- UErrorCode *pErrorCode) {
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToTitle(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
int32_t prev, titleStart, titleLimit, idx, destIndex, length;
UBool isFirstIndex;
- if(csm->iter!=NULL) {
- ubrk_setText(csm->iter, src, srcLength, pErrorCode);
- } else {
- csm->iter=ubrk_open(UBRK_WORD, csm->locale,
- src, srcLength,
- pErrorCode);
- }
if(U_FAILURE(*pErrorCode)) {
return 0;
}
+ // Use the C++ abstract base class to minimize dependencies.
+ // TODO: Change UCaseMap.iter to store a BreakIterator directly.
+ BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
+
/* set up local variables */
+ int32_t locCache=csm->locCache;
+ UCaseContext csc=UCASECONTEXT_INITIALIZER;
+ csc.p=(void *)src;
+ csc.limit=srcLength;
destIndex=0;
prev=0;
isFirstIndex=TRUE;
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=FALSE;
- idx=ubrk_first(csm->iter);
+ idx=bi->first();
} else {
- idx=ubrk_next(csm->iter);
+ idx=bi->next();
}
if(idx==UBRK_DONE || idx>srcLength) {
idx=srcLength;
if(titleStart<titleLimit) {
/* titlecase c which is from [titleStart..titleLimit[ */
- csc->cpStart=titleStart;
- csc->cpLimit=titleLimit;
- c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
+ csc.cpStart=titleStart;
+ csc.cpLimit=titleLimit;
+ c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
/* Special case Dutch IJ titlecasing */
if ( titleStart+1 < idx &&
- ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
+ ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH &&
( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
c=(UChar32) 0x004A;
_caseMap(
csm, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
- src, csc,
+ src, &csc,
titleLimit, idx,
pErrorCode);
} else {
return destIndex;
}
-#endif
+#endif // !UCONFIG_NO_BREAK_ITERATION
/* functions available in the common library (for unistr_case.cpp) */
-U_CFUNC int32_t
-ustr_toLower(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToLower(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
- csm.csp=csp;
- setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
-
- return _caseMap(&csm, ucase_toFullLower,
- dest, destCapacity,
- src, &csc, 0, srcLength,
- pErrorCode);
+ return _caseMap(
+ csm, ucase_toFullLower,
+ dest, destCapacity,
+ src, &csc, 0, srcLength,
+ pErrorCode);
}
-U_CFUNC int32_t
-ustr_toUpper(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalToUpper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
- csm.csp=csp;
- setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
-
- return _caseMap(&csm, ucase_toFullUpper,
- dest, destCapacity,
- src, &csc, 0, srcLength,
- pErrorCode);
+ return _caseMap(
+ csm, ucase_toFullUpper,
+ dest, destCapacity,
+ src, &csc, 0, srcLength,
+ pErrorCode);
}
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CFUNC int32_t
-ustr_toTitle(const UCaseProps *csp,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale, uint32_t options,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
- int32_t length;
-
- csm.csp=csp;
- csm.iter=titleIter;
- csm.options=options;
- setTempCaseMap(&csm, locale, pErrorCode);
- csc.p=(void *)src;
- csc.limit=srcLength;
-
- length=_toTitle(&csm,
- dest, destCapacity,
- src, &csc, srcLength,
- pErrorCode);
- if(titleIter==NULL && csm.iter!=NULL) {
- ubrk_close(csm.iter);
- }
- return length;
-}
-
-#endif
-
-U_CFUNC int32_t
+static int32_t
ustr_foldCase(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
return destIndex;
}
-/*
- * Implement argument checking and buffer handling
- * for string case mapping as a common function.
- */
-
-/* common internal function for public API functions */
+U_CFUNC int32_t U_CALLCONV
+ustrcase_internalFold(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode);
+}
-static int32_t
-caseMap(const UCaseMap *csm,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- int32_t toWhichCase,
- UErrorCode *pErrorCode) {
+U_CFUNC int32_t
+ustrcase_map(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UStringCaseMapper *stringCaseMapper,
+ UErrorCode *pErrorCode) {
UChar buffer[300];
UChar *temp;
int32_t destLength;
/* check argument values */
- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+ if(U_FAILURE(*pErrorCode)) {
return 0;
}
if( destCapacity<0 ||
temp=dest;
}
- destLength=0;
-
- if(toWhichCase==FOLD_CASE) {
- destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
- csm->options, pErrorCode);
- } else {
- UCaseContext csc=UCASECONTEXT_INITIALIZER;
-
- csc.p=(void *)src;
- csc.limit=srcLength;
-
- if(toWhichCase==TO_LOWER) {
- destLength=_caseMap(csm, ucase_toFullLower,
- temp, destCapacity,
- src, &csc,
- 0, srcLength,
- pErrorCode);
- } else if(toWhichCase==TO_UPPER) {
- destLength=_caseMap(csm, ucase_toFullUpper,
- temp, destCapacity,
- src, &csc,
- 0, srcLength,
- pErrorCode);
- } else /* if(toWhichCase==TO_TITLE) */ {
-#if UCONFIG_NO_BREAK_ITERATION
- *pErrorCode=U_UNSUPPORTED_ERROR;
-#else
- /* UCaseMap is actually non-const in toTitle() APIs. */
- destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
- src, &csc, srcLength,
- pErrorCode);
-#endif
- }
- }
+ destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode);
if(temp!=dest) {
/* copy the result string to the destination buffer */
if(destLength>0) {
/* public API functions */
-U_CAPI int32_t U_EXPORT2
-u_strToLower(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
- setTempCaseMap(&csm, locale, pErrorCode);
- return caseMap(&csm,
- dest, destCapacity,
- src, srcLength,
- TO_LOWER, pErrorCode);
-}
-
-U_CAPI int32_t U_EXPORT2
-u_strToUpper(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- const char *locale,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
- setTempCaseMap(&csm, locale, pErrorCode);
- return caseMap(&csm,
- dest, destCapacity,
- src, srcLength,
- TO_UPPER, pErrorCode);
-}
-
-#if !UCONFIG_NO_BREAK_ITERATION
-
-U_CAPI int32_t U_EXPORT2
-u_strToTitle(UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale,
- UErrorCode *pErrorCode) {
- UCaseMap csm=UCASEMAP_INITIALIZER;
- int32_t length;
-
- csm.iter=titleIter;
- setTempCaseMap(&csm, locale, pErrorCode);
- length=caseMap(&csm,
- dest, destCapacity,
- src, srcLength,
- TO_TITLE, pErrorCode);
- if(titleIter==NULL && csm.iter!=NULL) {
- ubrk_close(csm.iter);
- }
- return length;
-}
-
-U_CAPI int32_t U_EXPORT2
-ucasemap_toTitle(UCaseMap *csm,
- UChar *dest, int32_t destCapacity,
- const UChar *src, int32_t srcLength,
- UErrorCode *pErrorCode) {
- return caseMap(csm,
- dest, destCapacity,
- src, srcLength,
- TO_TITLE, pErrorCode);
-}
-
-#endif
-
U_CAPI int32_t U_EXPORT2
u_strFoldCase(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UCaseMap csm=UCASEMAP_INITIALIZER;
csm.csp=ucase_getSingleton();
csm.options=options;
- return caseMap(&csm,
- dest, destCapacity,
- src, srcLength,
- FOLD_CASE, pErrorCode);
+ return ustrcase_map(
+ &csm,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalFold, pErrorCode);
}
/* case-insensitive string comparisons -------------------------------------- */
--- /dev/null
+/*
+*******************************************************************************
+* Copyright (C) 2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ustrcase_locale.cpp
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011may31
+* created by: Markus W. Scherer
+*
+* Locale-sensitive case mapping functions (ones that call uloc_getDefault())
+* were moved here to break dependency cycles among parts of the common library.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ucasemap.h"
+#include "unicode/uloc.h"
+#include "unicode/ustring.h"
+#include "ucase.h"
+#include "ustr_imp.h"
+
+U_CFUNC void
+ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) {
+ /*
+ * We could call ucasemap_setLocale(), but here we really only care about
+ * the initial language subtag, we need not return the real string via
+ * ucasemap_getLocale(), and we don't care about only getting "x" from
+ * "x-some-thing" etc.
+ *
+ * We ignore locales with a longer-than-3 initial subtag.
+ *
+ * We also do not fill in the locCache because it is rarely used,
+ * and not worth setting unless we reuse it for many case mapping operations.
+ * (That's why UCaseMap was created.)
+ */
+ int i;
+ char c;
+
+ /* the internal functions require locale!=NULL */
+ if(locale==NULL) {
+ // Do not call uprv_getDefaultLocaleID() because that does not see
+ // changes to the default locale via uloc_setDefault().
+ // It would also be inefficient if used frequently because uprv_getDefaultLocaleID()
+ // does not cache the locale ID.
+ //
+ // Unfortunately, uloc_getDefault() has many dependencies.
+ // We only care about a small set of language subtags,
+ // and we do not need the locale ID to be canonicalized.
+ //
+ // Best is to not call case mapping functions with a NULL locale ID.
+ locale=uloc_getDefault();
+ }
+ for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
+ csm->locale[i]=c;
+ }
+ if(i<=3) {
+ csm->locale[i]=0; /* Up to 3 non-separator characters. */
+ } else {
+ csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
+ }
+}
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static inline void
+setTempCaseMap(UCaseMap *csm, const char *locale) {
+ if(csm->csp==NULL) {
+ csm->csp=ucase_getSingleton();
+ }
+ if(locale!=NULL && locale[0]==0) {
+ csm->locale[0]=0;
+ } else {
+ ustrcase_setTempCaseMapLocale(csm, locale);
+ }
+}
+
+/* public API functions */
+
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ setTempCaseMap(&csm, locale);
+ return ustrcase_map(
+ &csm,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToLower, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode) {
+ UCaseMap csm=UCASEMAP_INITIALIZER;
+ setTempCaseMap(&csm, locale);
+ return ustrcase_map(
+ &csm,
+ dest, destCapacity,
+ src, srcLength,
+ ustrcase_internalToUpper, pErrorCode);
+}
__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
return length;
}
+
+// Compute the hash code for a string -------------------------------------- ***
+
+// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
+// on UHashtable code.
+
+/*
+ Compute the hash by iterating sparsely over about 32 (up to 63)
+ characters spaced evenly through the string. For each character,
+ multiply the previous hash value by a prime number and add the new
+ character in, like a linear congruential random number generator,
+ producing a pseudorandom deterministic value well distributed over
+ the output range. [LIU]
+*/
+
+#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
+ int32_t hash = 0; \
+ const TYPE *p = (const TYPE*) STR; \
+ if (p != NULL) { \
+ int32_t len = (int32_t)(STRLEN); \
+ int32_t inc = ((len - 32) / 32) + 1; \
+ const TYPE *limit = p + len; \
+ while (p<limit) { \
+ hash = (hash * 37) + DEREF; \
+ p += inc; \
+ } \
+ } \
+ return hash
+
+/* Used by UnicodeString to compute its hashcode - Not public API. */
+U_CAPI int32_t U_EXPORT2
+ustr_hashUCharsN(const UChar *str, int32_t length) {
+ STRING_HASH(UChar, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashCharsN(const char *str, int32_t length) {
+ STRING_HASH(uint8_t, str, length, *p);
+}
+
+U_CAPI int32_t U_EXPORT2
+ustr_hashICharsN(const char *str, int32_t length) {
+ STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
+}
*/
#include "unicode/unimatch.h"
-#include "unicode/uniset.h"
#include "patternprops.h"
#include "util.h"
}
U_NAMESPACE_END
-
-U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
-uprv_openPatternWhiteSpaceSet(UErrorCode* ec) {
- if(U_FAILURE(*ec)) {
- return NULL;
- }
- // create a set with the Pattern_White_Space characters,
- // without a pattern string for fewer code dependencies
- U_NAMESPACE_QUALIFIER UnicodeSet *set=new U_NAMESPACE_QUALIFIER UnicodeSet(9, 0xd);
- // Check for new failure.
- if (set == NULL) {
- *ec = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
- return set;
-}
-
-//eof
U_NAMESPACE_BEGIN
class UnicodeMatcher;
-class UnicodeSet;
class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
public:
U_NAMESPACE_END
-/**
- * Returns a new set with the Pattern_White_Space characters.
- * The caller must close/delete the result.
- * Stable set of characters, won't change.
- * See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/
- * @internal
- */
-U_CAPI U_NAMESPACE_QUALIFIER UnicodeSet* U_EXPORT2
-uprv_openPatternWhiteSpaceSet(UErrorCode* ec);
-
#endif
//eof
/*
*******************************************************************************
-* Copyright (C) 2010, International Business Machines
+* Copyright (C) 2010-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: uts46.cpp
#include "cmemory.h"
#include "cstring.h"
#include "punycode.h"
+#include "ubidi_props.h"
#include "ustr_imp.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
UBool
UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const {
+ const UBiDiProps *bdp=ubidi_getSingleton();
// [IDNA2008-Tables]
// 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
for(int32_t i=0; i<labelLength; ++i) {
}
// check precontext (Joining_Type:{L,D})(Joining_Type:T)*
for(;;) {
- UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
+ UJoiningType type=ubidi_getJoiningType(bdp, c);
if(type==U_JT_TRANSPARENT) {
if(j==0) {
return FALSE;
return FALSE;
}
U16_NEXT_UNSAFE(label, j, c);
- UJoiningType type=(UJoiningType)u_getIntPropertyValue(c, UCHAR_JOINING_TYPE);
+ UJoiningType type=ubidi_getJoiningType(bdp, c);
if(type==U_JT_TRANSPARENT) {
// just skip this character
} else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) {
#include "uvector.h"
#include <string>
-#include <iostream>
+//#include <iostream>
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex)
static int32_t U_CALLCONV
recordCompareFn(const void *context, const void *left, const void *right);
-//
-// UHash support function, delete a UnicodeSet
-// TODO: move this function into uhash.
-//
-static void U_CALLCONV
-uhash_deleteUnicodeSet(void *obj) {
- delete static_cast<UnicodeSet *>(obj);
-}
-
// UVector<Bucket *> support function, delete a Bucket.
static void U_CALLCONV
alphaIndex_deleteBucket(void *obj) {
// that are the same according to the collator
UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector.
- preferenceSorting.setDeleter(uhash_deleteUnicodeString);
+ preferenceSorting.setDeleter(uprv_deleteUObject);
appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
const int32_t size = labelSet.size() - 1;
if (size > maxLabelCount_) {
UVector *newLabels = new UVector(status);
- newLabels->setDeleter(uhash_deleteUnicodeString);
+ newLabels->setDeleter(uprv_deleteUObject);
int32_t count = 0;
int32_t old = -1;
for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
uhash_compareUnicodeString, // key Comparator,
NULL, // value Comparator
&status);
- uhash_setKeyDeleter(alreadyIn_, uhash_deleteUnicodeString);
- uhash_setValueDeleter(alreadyIn_, uhash_deleteUnicodeSet);
+ uhash_setKeyDeleter(alreadyIn_, uprv_deleteUObject);
+ uhash_setValueDeleter(alreadyIn_, uprv_deleteUObject);
bucketList_ = new UVector(status);
bucketList_->setDeleter(alphaIndex_deleteBucket);
labels_ = new UVector(status);
- labels_->setDeleter(uhash_deleteUnicodeString);
+ labels_->setDeleter(uprv_deleteUObject);
labels_->setComparer(uhash_compareUnicodeString);
inputRecords_ = new UVector(status);
inputRecords_->setDeleter(alphaIndex_deleteRecord);
}
UVector *dest = new UVector(status);
- dest->setDeleter(uhash_deleteUnicodeString);
+ dest->setDeleter(uprv_deleteUObject);
for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
if (results[i].length() > 0) {
dest->addElement(results[i].clone(), status);
return NULL;
}
UVector *dest = new UVector(status);
- dest->setDeleter(uhash_deleteUnicodeString);
+ dest->setDeleter(uprv_deleteUObject);
if (dest == NULL && U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
} else {
ret->append((UChar)0x40); // '@' is a variant character
ret->append(UNICODE_STRING("calendar=", 9));
- ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())]));
+ ret->append(UnicodeString(gCalTypes[getCalendarTypeForLocale(loc.getName())], -1, US_INV));
}
return ret;
}
std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n";
#endif
- fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount), pattern, status);
+ fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status);
}
}
}
/* ------------------------------------------------------------------ */
/* Decimal Context module */
/* ------------------------------------------------------------------ */
-/* Copyright (c) IBM Corporation, 2000-2010. All rights reserved. */
+/* Copyright (c) IBM Corporation, 2000-2011. All rights reserved. */
/* */
/* This software is made available under the terms of the */
/* ICU License -- ICU 1.8.1 and later. */
#include "decContext.h" /* context and base types */
#include "decNumberLocal.h" /* decNumber local types, etc. */
+#if 0 /* ICU: No need to test endianness at runtime. */
/* compile-time endian tester [assumes sizeof(Int)>1] */
static const Int mfcone=1; /* constant 1 */
static const Flag *mfctop=(Flag *)&mfcone; /* -> top byte */
#define LITEND *mfctop /* named flag; 1=little-endian */
+#endif
/* ------------------------------------------------------------------ */
/* round-for-reround digits */
/* ------------------------------------------------------------------ */
U_CAPI decContext * U_EXPORT2 uprv_decContextSetStatus(decContext *context, uInt status) {
context->status|=status;
+#if 0 /* ICU: Do not raise signals. */
if (status & context->traps) raise(SIGFPE);
+#endif
return context;} /* decContextSetStatus */
/* ------------------------------------------------------------------ */
/* */
/* No error is possible. */
/* ------------------------------------------------------------------ */
+#if 0 /* ICU: Unused function. Anyway, do not call printf(). */
U_CAPI Int U_EXPORT2 uprv_decContextTestEndian(Flag quiet) {
Int res=0; /* optimist */
uInt dle=(uInt)DECLITEND; /* unsign */
}
return res;
} /* decContextTestEndian */
+#endif
/* ------------------------------------------------------------------ */
/* decContextTestSavedStatus -- test bits in saved status */
// For most locale, the patterns are probably the same for all
// plural count. If not, the right pattern need to be re-applied
// during format.
- fCurrencyPluralInfo->getCurrencyPluralPattern("other", currencyPluralPatternForOther);
+ fCurrencyPluralInfo->getCurrencyPluralPattern(UNICODE_STRING("other", 5), currencyPluralPatternForOther);
patternUsed = ¤cyPluralPatternForOther;
// TODO: not needed?
setCurrencyForSymbols();
*fPosPrefixPattern,
*fPosSuffixPattern,
UCURR_SYMBOL_NAME);
- fAffixPatternsForCurrency->put("default", affixPtn, status);
+ fAffixPatternsForCurrency->put(UNICODE_STRING("default", 7), affixPtn, status);
}
// save the unique currency plural patterns of this locale.
const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
StringEnumeration* keywords = pluralRules->getKeywords(status);
if (U_SUCCESS(status)) {
- const char* pluralCountCh;
- while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
+ const UnicodeString* pluralCount;
+ while ((pluralCount = keywords->snext(status)) != NULL) {
if ( U_SUCCESS(status) ) {
- UnicodeString pluralCount = UnicodeString(pluralCountCh);
- expandAffixAdjustWidth(&pluralCount);
+ expandAffixAdjustWidth(pluralCount);
AffixesForCurrency* affix = new AffixesForCurrency(
fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
- fAffixesForCurrency->put(pluralCount, affix, status);
+ fAffixesForCurrency->put(*pluralCount, affix, status);
}
}
}
const PluralRules* pluralRules = fCurrencyPluralInfo->getPluralRules();
StringEnumeration* keywords = pluralRules->getKeywords(status);
if (U_SUCCESS(status)) {
- const char* pluralCountCh;
- while ((pluralCountCh = keywords->next(NULL, status)) != NULL) {
+ const UnicodeString* pluralCount;
+ while ((pluralCount = keywords->snext(status)) != NULL) {
if ( U_SUCCESS(status) ) {
- UnicodeString pluralCount = UnicodeString(pluralCountCh);
UnicodeString ptn;
- fCurrencyPluralInfo->getCurrencyPluralPattern(pluralCount, ptn);
- applyPatternInternally(pluralCount, ptn, false, parseErr, status);
+ fCurrencyPluralInfo->getCurrencyPluralPattern(*pluralCount, ptn);
+ applyPatternInternally(*pluralCount, ptn, false, parseErr, status);
AffixesForCurrency* affix = new AffixesForCurrency(
fNegativePrefix, fNegativeSuffix, fPositivePrefix, fPositiveSuffix);
- fPluralAffixesForCurrency->put(pluralCount, affix, status);
+ fPluralAffixesForCurrency->put(*pluralCount, affix, status);
}
}
}
// For other cases, pluralCount == null,
// and plural names are not needed.
int32_t len;
- // TODO: num of char in plural count
- char pluralCountChar[10];
- if (pluralCount->length() >= 10) {
- break;
- }
- pluralCount->extract(0, pluralCount->length(), pluralCountChar);
+ CharString pluralCountChar;
+ pluralCountChar.appendInvariantChars(*pluralCount, ec);
UBool isChoiceFormat;
const UChar* s = ucurr_getPluralName(currencyUChars,
fSymbols != NULL ? fSymbols->getLocale().getName() :
Locale::getDefault().getName(), &isChoiceFormat,
- pluralCountChar, &len, &ec);
+ pluralCountChar.data(), &len, &ec);
affix += UnicodeString(s, len);
handler.addAttribute(kCurrencyField, beginIdx, affix.length());
} else if(intl) {
DateIntervalInfo::parseSkeleton(inputSkeleton, inputSkeletonFieldWidth);
DateIntervalInfo::parseSkeleton(bestMatchSkeleton, bestMatchSkeletonFieldWidth);
if ( differenceInfo == 2 ) {
- adjustedPtn.findAndReplace("v", "z");
+ adjustedPtn.findAndReplace(UnicodeString((UChar)0x76 /* v */),
+ UnicodeString((UChar)0x7a /* z */));
}
UBool inQuote = false;
int32_t size = ures_getSize(itvDtPtnResource);
int32_t index;
for ( index = 0; index < size; ++index ) {
- UResourceBundle* oneRes = ures_getByIndex(itvDtPtnResource, index,
- NULL, &status);
+ LocalUResourceBundlePointer oneRes(ures_getByIndex(itvDtPtnResource, index,
+ NULL, &status));
if ( U_SUCCESS(status) ) {
- const char* skeleton = ures_getKey(oneRes);
- if ( skeleton == NULL ||
- skeletonSet.geti(UnicodeString(skeleton)) == 1 ) {
- ures_close(oneRes);
+ const char* skeleton = ures_getKey(oneRes.getAlias());
+ if (skeleton == NULL) {
continue;
}
- skeletonSet.puti(UnicodeString(skeleton), 1, status);
+ UnicodeString skeletonUniStr(skeleton, -1, US_INV);
+ if ( skeletonSet.geti(skeletonUniStr) == 1 ) {
+ continue;
+ }
+ skeletonSet.puti(skeletonUniStr, 1, status);
if ( uprv_strcmp(skeleton, gFallbackPatternTag) == 0 ) {
- ures_close(oneRes);
continue; // fallback
}
-
- UResourceBundle* intervalPatterns = ures_getByKey(
- itvDtPtnResource, skeleton, NULL, &status);
-
+
+ LocalUResourceBundlePointer intervalPatterns(ures_getByKey(
+ itvDtPtnResource, skeleton, NULL, &status));
+
if ( U_FAILURE(status) ) {
- ures_close(intervalPatterns);
- ures_close(oneRes);
break;
}
if ( intervalPatterns == NULL ) {
- ures_close(intervalPatterns);
- ures_close(oneRes);
continue;
}
-
+
const UChar* pattern;
const char* key;
int32_t ptLength;
- int32_t ptnNum = ures_getSize(intervalPatterns);
+ int32_t ptnNum = ures_getSize(intervalPatterns.getAlias());
int32_t ptnIndex;
for ( ptnIndex = 0; ptnIndex < ptnNum; ++ptnIndex ) {
- pattern = ures_getNextString(intervalPatterns, &ptLength, &key,
+ pattern = ures_getNextString(intervalPatterns.getAlias(), &ptLength, &key,
&status);
if ( U_FAILURE(status) ) {
break;
calendarField = UCAL_MINUTE;
}
if ( calendarField != UCAL_FIELD_COUNT ) {
- setIntervalPatternInternally(skeleton, calendarField, pattern,status);
+ setIntervalPatternInternally(skeletonUniStr, calendarField, pattern,status);
}
}
- ures_close(intervalPatterns);
}
- ures_close(oneRes);
}
}
ures_close(itvDtPtnResource);
/*
*******************************************************************************
-* Copyright (C) 2010, International Business Machines Corporation and *
-* others. All Rights Reserved. *
+* Copyright (C) 2010-2011, International Business Machines Corporation and
+* others. All Rights Reserved.
*******************************************************************************
*/
while ((key = e->next((int32_t *)0, status)) != NULL) {
locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
appendWithSep(resultRemainder, keyDisplayName(key, temp))
- .append("=")
+ .append((UChar)0x3d /* = */)
.append(keyValueDisplayName(key, value, temp2));
}
delete e;
delete formatter;
return;
}
- uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
+ uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
if (formatter == NULL) {
formatter = new DummyFormat();
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- fFormatNames->setDeleter(uhash_deleteUObject);
+ fFormatNames->setDeleter(uprv_deleteUObject);
for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
fFormatNames->addElement(new UnicodeString(getArgName(partIndex)), status);
if (U_FAILURE(ec)) {
return;
}
- uhash_setValueDeleter(cachedFormatters, uhash_deleteUObject);
+ uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
}
const int32_t count = uhash_count(that.cachedFormatters);
#include "plurrule_impl.h"
#include "putilimp.h"
#include "ucln_in.h"
-#include "uhash.h"
#include "ustrfmt.h"
#include "locutil.h"
if (U_FAILURE(status)) {
return;
}
- fKeywordNames.setDeleter(uhash_deleteUObject);
+ fKeywordNames.setDeleter(uprv_deleteUObject);
UBool addKeywordOther=TRUE;
RuleChain *node=header;
while(node!=NULL) {
/*
**********************************************************************
-* Copyright (C) 1999-2008, International Business Machines
+* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
if (U_FAILURE(status)) {
return;
}
- variableNames.setValueDeleter(uhash_deleteUnicodeString);
+ variableNames.setValueDeleter(uprv_deleteUObject);
variables = 0;
variablesLength = 0;
}
{
UErrorCode status = U_ZERO_ERROR;
int32_t i = 0;
- variableNames.setValueDeleter(uhash_deleteUnicodeString);
+ variableNames.setValueDeleter(uprv_deleteUObject);
int32_t pos = -1;
const UHashElement *e;
while ((e = other.variableNames.nextElement(pos)) != 0) {
variablesVector(statusReturn),
segmentObjects(statusReturn)
{
- idBlockVector.setDeleter(uhash_deleteUnicodeString);
+ idBlockVector.setDeleter(uprv_deleteUObject);
curData = NULL;
compoundFilter = NULL;
parseData = NULL;
- variableNames.setValueDeleter(uhash_deleteUnicodeString);
+ variableNames.setValueDeleter(uprv_deleteUObject);
}
/**
// next step. Otherwise, all time zone names starting with GMT/UT/UTC
// (for example, "UTT") will fail.
if (gmtLen > 0 && ((text.length() - start) == gmtLen)) {
- TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
+ TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
cal.adoptTimeZone(tz);
return start + gmtLen;
}
// Step 5
// If we saw standalone GMT zero pattern, then use GMT.
if (gmtLen > 0) {
- TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
+ TimeZone *tz = TimeZone::createTimeZone(UNICODE_STRING("Etc/GMT", 7));
cal.adoptTimeZone(tz);
return start + gmtLen;
}
fTimeIgnorables(NULL),
fOtherIgnorables(NULL)
{
- fDateIgnorables = new UnicodeSet("[-,./[:whitespace:]]", *status);
- fTimeIgnorables = new UnicodeSet("[-.:[:whitespace:]]", *status);
- fOtherIgnorables = new UnicodeSet("[:whitespace:]", *status);
-
+ fDateIgnorables = new UnicodeSet(UNICODE_STRING("[-,./[:whitespace:]]", 20), *status);
+ fTimeIgnorables = new UnicodeSet(UNICODE_STRING("[-.:[:whitespace:]]", 19), *status);
+ fOtherIgnorables = new UnicodeSet(UNICODE_STRING("[:whitespace:]", 14), *status);
+
// Check for null pointers
if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) {
goto ExitConstrDeleteAll;
}
-
+
// Freeze all the sets
fDateIgnorables->freeze();
fTimeIgnorables->freeze();
fOtherIgnorables->freeze();
-
+
return; // If we reached this point, everything is fine so just exit
-
+
ExitConstrDeleteAll: // Remove all sets and return error
delete fDateIgnorables; fDateIgnorables = NULL;
delete fTimeIgnorables; fTimeIgnorables = NULL;
delete fOtherIgnorables; fOtherIgnorables = NULL;
-
+
*status = U_MEMORY_ALLOCATION_ERROR;
}
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
+#include "ustr_imp.h"
/* public RuleBasedCollator constructor ---------------------------------- */
{
int32_t length;
const UChar *rules = ucol_getRules(ucollator, &length);
- return uhash_hashUCharsN(rules, length);
+ return ustr_hashUCharsN(rules, length);
}
/**
#if !UCONFIG_NO_FORMATTING
+#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "hash.h"
if (fNumberFormat != NULL) {
messageFormat->setFormat(0, *fNumberFormat);
}
- MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
+ UnicodeString pluralCountUniStr(pluralCount, -1, US_INV);
+ MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCountUniStr);
if (formatters == NULL) {
formatters = (MessageFormat**)uprv_malloc(UTMUTFMT_FORMAT_STYLE_COUNT*sizeof(MessageFormat*));
formatters[UTMUTFMT_FULL_STYLE] = NULL;
formatters[UTMUTFMT_ABBREVIATED_STYLE] = NULL;
- countToPatterns->put(pluralCount, formatters, err);
+ countToPatterns->put(pluralCountUniStr, formatters, err);
if (U_FAILURE(err)) {
uprv_free(formatters);
}
//
StringEnumeration* keywords = fPluralRules->getKeywords(err);
if (U_SUCCESS(err)) {
- const char* pluralCount;
- while ((pluralCount = keywords->next(NULL, err)) != NULL) {
+ const UnicodeString* pluralCount;
+ while ((pluralCount = keywords->snext(err)) != NULL) {
if ( U_SUCCESS(err) ) {
for (int32_t i = 0; i < TimeUnit::UTIMEUNIT_FIELD_COUNT; ++i) {
// for each time unit,
}
fTimeUnitToCountToPatterns[i] = countToPatterns;
}
- MessageFormat** formatters = (MessageFormat**)countToPatterns->get(pluralCount);
+ MessageFormat** formatters = (MessageFormat**)countToPatterns->get(*pluralCount);
if( formatters == NULL || formatters[style] == NULL ) {
// look through parents
const char* localeName = fLocale.getName();
+ CharString pluralCountChars;
+ pluralCountChars.appendInvariantChars(*pluralCount, err);
searchInLocaleChain(style, key, localeName,
(TimeUnit::UTimeUnitFields)i,
- pluralCount, pluralCount,
+ *pluralCount, pluralCountChars.data(),
countToPatterns, err);
}
}
void
TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
TimeUnit::UTimeUnitFields srcTimeUnitField,
- const char* srcPluralCount,
+ const UnicodeString& srcPluralCount,
const char* searchPluralCount,
Hashtable* countToPatterns,
UErrorCode& err) {
/*
**********************************************************************
-* Copyright (c) 2001-2010, International Business Machines
+* Copyright (c) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
availableIDs(status)
{
registry.setValueDeleter(deleteEntry);
- availableIDs.setDeleter(uhash_deleteUnicodeString);
+ availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
}
if (U_FAILURE(status) || targets == 0) {
return;
}
- targets->setValueDeleter(uhash_deleteUObject);
+ targets->setValueDeleter(uprv_deleteUObject);
specDAG.put(source, targets, status);
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
- variants = new UVector(uhash_deleteUnicodeString,
+ variants = new UVector(uprv_deleteUObject,
uhash_compareCaselessUnicodeString, status);
if (variants == 0) {
return;
/*
**********************************************************************
-* Copyright (c) 2002-2009, International Business Machines Corporation
+* Copyright (c) 2002-2011, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
* Date Name Description
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
- special_inverses->setValueDeleter(uhash_deleteUnicodeString);
+ special_inverses->setValueDeleter(uprv_deleteUObject);
umtx_lock(&LOCK);
if (SPECIAL_INVERSES == NULL) {
if (!gTimeZoneFormatCacheInitialized) {
gTimeZoneFormatCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
if (U_SUCCESS(status)) {
- uhash_setKeyDeleter(gTimeZoneFormatCache, uhash_freeBlock);
+ uhash_setKeyDeleter(gTimeZoneFormatCache, uprv_free);
uhash_setValueDeleter(gTimeZoneFormatCache, deleteTimeZoneFormatCacheEntry);
gTimeZoneFormatCacheInitialized = TRUE;
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, timeZoneFormat_cleanup);
.append(p->mzID)
.append((UChar)0x23)
.append((UChar)(p->isLong ? 0x4C : 0x53));
- return uhash_hashUCharsN(str.getBuffer(), str.length());
+ return str.hashCode();
}
/**
if ((nameinfo->type & fTypes) != 0) {
// matches a requested type
if (fResults == NULL) {
- fResults = new UVector(uhash_freeBlock, NULL, status);
+ fResults = new UVector(uprv_free, NULL, status);
if (fResults == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
cleanup();
return;
}
- uhash_setKeyDeleter(fPartialLocationNamesMap, uhash_freeBlock);
+ uhash_setKeyDeleter(fPartialLocationNamesMap, uprv_free);
// no value deleter
// target region
if (!gTimeZoneNamesCacheInitialized) {
gTimeZoneNamesCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
if (U_SUCCESS(status)) {
- uhash_setKeyDeleter(gTimeZoneNamesCache, uhash_freeBlock);
+ uhash_setKeyDeleter(gTimeZoneNamesCache, uprv_free);
uhash_setValueDeleter(gTimeZoneNamesCache, deleteTimeZoneNamesCacheEntry);
gTimeZoneNamesCacheInitialized = TRUE;
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONENAMES, timeZoneNames_cleanup);
int32_t sep = tzID.lastIndexOf((UChar)0x2F /* '/' */);
if (sep > 0 && sep + 1 < tzID.length()) {
name.setTo(tzID, sep + 1);
- name.findAndReplace("_", " ");
+ name.findAndReplace(UnicodeString((UChar)0x5f /* _ */),
+ UnicodeString((UChar)0x20 /* space */));
} else {
name.setToBogus();
}
if ((nameinfo->type & fTypes) != 0) {
// matches a requested type
if (fResults == NULL) {
- fResults = new UVector(uhash_freeBlock, NULL, status);
+ fResults = new UVector(uprv_free, NULL, status);
if (fResults == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
}
UnicodeString&
TimeZoneNamesImpl::getReferenceZoneID(const UnicodeString& mzID, const char* region, UnicodeString& tzID) const {
- ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region), tzID);
+ ZoneMeta::getZoneIdByMetazone(mzID, UnicodeString(region, -1, US_INV), tzID);
return tzID;
}
static void toUpper(const char* src, char* dst, uint32_t length) {
for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) {
- *dst = toupper(*src);
+ *dst = uprv_toupper(*src);
}
*dst = '\0';
}
/*
*******************************************************************************
*
-* Copyright (C) 2001-2010, International Business Machines
+* Copyright (C) 2001-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
if (U_FAILURE(*status)) {
goto allocation_failure;
}
- uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
+ uhash_setValueDeleter(t->prefixLookup, uprv_free);
t->contractions = uprv_cnttab_open(t->mapping, status);
if (U_FAILURE(*status)) {
#endif
-/*static inline void U_CALLCONV
-uhash_freeBlockWrapper(void *obj) {
- uhash_freeBlock(obj);
-}*/
-
-
typedef struct {
uint32_t startCE;
uint32_t startContCE;
if(U_FAILURE(*status)) {
return;
}
- uhash_setValueDeleter(src->tailored, uhash_freeBlock);
+ uhash_setValueDeleter(src->tailored, uprv_free);
src->opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
/* test for NULL */
// fill in fTimeUnitToCountToPatterns from locale fall-back chain
void searchInLocaleChain(UTimeUnitFormatStyle style, const char* key, const char* localeName,
- TimeUnit::UTimeUnitFields field, const char*,
+ TimeUnit::UTimeUnitFields field, const UnicodeString&,
const char*, Hashtable*, UErrorCode&);
// initialize hash table
// Capture Group 8: A syntactically invalid line. Anything that didn't match before.
// Example Line from the confusables.txt source file:
// "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
- fParseLine = uregex_openC(
+ UnicodeString pattern(
"(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char
"[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s)
"(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued)
"\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type
"[ \\t]*(?:#.*?)?$" // Match any trailing #comment
"|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment
- "|^(.*?)$", // OR match any line, which catches illegal lines.
- 0, NULL, &status);
+ "|^(.*?)$", -1, US_INV); // OR match any line, which catches illegal lines.
+ // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
+ fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
// Regular expression for parsing a hex number out of a space-separated list of them.
// Capture group 1 gets the number, with spaces removed.
- fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
+ pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
+ fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
// Zap any Byte Order Mark at the start of input. Changing it to a space is benign
// given the syntax of the input.
// The expression will match _all_ lines, including erroneous lines.
// The result of the parse is returned via the contents of the (match) groups.
static const char *parseExp =
-
"(?m)" // Multi-line mode
"^([ \\t]*(?:#.*?)?)$" // A blank or comment line. Matches Group 1.
"|^(?:" // OR
anyCaseTrie = utrie2_open(0, 0, &status);
lowerCaseTrie = utrie2_open(0, 0, &status);
-
+
+ UnicodeString pattern(parseExp, -1, US_INV);
// The scriptSets vector provides a mapping from TRIE values to the set of scripts.
//
}
u_strFromUTF8(input, inputLen+1, NULL, confusablesWS, confusablesWSLen, &status);
+ parseRegexp = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
-
- parseRegexp = uregex_openC(parseExp, 0, NULL, &status);
-
// Zap any Byte Order Mark at the start of input. Changing it to a space is benign
// given the syntax of the input.
if (*input == 0xfeff) {
/*
*******************************************************************************
-* Copyright (C) 2007-2010, International Business Machines Corporation and
+* Copyright (C) 2007-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
#include "cmemory.h"
#include "uvector.h"
#include "gregoimp.h"
-#include "uhash.h"
U_NAMESPACE_BEGIN
if (source.vtzlines != NULL) {
UErrorCode status = U_ZERO_ERROR;
int32_t size = source.vtzlines->size();
- vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
+ vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
if (U_SUCCESS(status)) {
for (int32_t i = 0; i < size; i++) {
UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i);
if (right.vtzlines != NULL) {
UErrorCode status = U_ZERO_ERROR;
int32_t size = right.vtzlines->size();
- vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, size, status);
+ vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status);
if (U_SUCCESS(status)) {
for (int32_t i = 0; i < size; i++) {
UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i);
void
VTimeZone::load(VTZReader& reader, UErrorCode& status) {
- vtzlines = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
+ vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status);
if (U_FAILURE(status)) {
return;
}
// Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules.
rules->setDeleter(deleteTimeZoneRule);
- dates = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
if (U_FAILURE(status)) {
goto cleanupParse;
}
} else {
UVector *customProps = NULL;
if (olsonzid.length() > 0 && icutzver.length() > 0) {
- customProps = new UVector(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ customProps = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status);
if (U_FAILURE(status)) {
return;
}
}
InitialTimeZoneRule *initial = NULL;
UVector *transitionRules = NULL;
- UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
UnicodeString tzid;
// Extract rules applicable to dates after the start time
return;
}
- UVector customProps(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ UVector customProps(uprv_deleteUObject, uhash_compareUnicodeString, status);
UnicodeString tzid;
// Extract simple rules
if (!gMetaZoneIDsInitialized) {
UErrorCode status = U_ZERO_ERROR;
UHashtable *metaZoneIDTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);
- uhash_setKeyDeleter(metaZoneIDTable, uhash_deleteUnicodeString);
+ uhash_setKeyDeleter(metaZoneIDTable, uprv_deleteUObject);
// No valueDeleter, because the vector maintain the value objects
UVector *metaZoneIDs = NULL;
if (U_SUCCESS(status)) {
uhash_close(metaZoneIDTable);
}
if (U_SUCCESS(status)) {
- metaZoneIDs->setDeleter(uhash_freeBlock);
+ metaZoneIDs->setDeleter(uprv_free);
UResourceBundle *rb = ures_openDirect(NULL, gMetaZones, &status);
UResourceBundle *bundle = ures_getByKey(rb, gMapTimezonesTag, NULL, &status);
--- /dev/null
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: dependencies.py
+#
+# created on: 2011may26
+
+"""Reader module for dependency data for the ICU dependency tester.
+
+Reads dependencies.txt and makes the data available.
+
+Attributes:
+ files: Set of "library/filename.o" files mentioned in the dependencies file.
+ items: Map from library or group names to item maps.
+ Each item has a "type" ("library" or "group" or "system_symbols").
+ A library or group item can have an optional set of "files" (as in the files attribute).
+ Each item can have an optional set of "deps" (libraries & groups).
+ A group item also has a "library" name unless it is a group of system symbols.
+ The one "system_symbols" item and its groups have sets of "system_symbols"
+ with standard-library system symbol names.
+ libraries: Set of library names mentioned in the dependencies file.
+"""
+__author__ = "Markus W. Scherer"
+
+# TODO: Support binary items.
+# .txt syntax: binary: tools/genrb
+# item contents: {"type": "binary"} with optional files & deps
+# A binary must not be used as a dependency for anything else.
+
+import sys
+
+files = set()
+items = {}
+libraries = set()
+
+_line_number = 0
+_groups_to_be_defined = set()
+
+def _CheckLibraryName(name):
+ global _line_number
+ if not name:
+ sys.exit("Error:%d: \"library: \" without name" % _line_number)
+ if name.endswith(".o"):
+ sys.exit("Error:%d: invalid library name %s" % (_line_number, name))
+
+def _CheckGroupName(name):
+ global _line_number
+ if not name:
+ sys.exit("Error:%d: \"group: \" without name" % _line_number)
+ if "/" in name or name.endswith(".o"):
+ sys.exit("Error:%d: invalid group name %s" % (_line_number, name))
+
+def _CheckFileName(name):
+ global _line_number
+ if "/" in name or not name.endswith(".o"):
+ sys.exit("Error:%d: invalid file name %s" % (_line_number, name))
+
+def _RemoveComment(line):
+ global _line_number
+ _line_number = _line_number + 1
+ index = line.find("#") # Remove trailing comment.
+ if index >= 0: line = line[:index]
+ return line.rstrip() # Remove trailing newlines etc.
+
+def _ReadLine(f):
+ while True:
+ line = _RemoveComment(f.next())
+ if line: return line
+
+def _ReadFiles(deps_file, item, library_name):
+ global files
+ item_files = item.get("files")
+ while True:
+ line = _ReadLine(deps_file)
+ if not line: continue
+ if not line.startswith(" "): return line
+ if item_files == None: item_files = item["files"] = set()
+ for file_name in line.split():
+ _CheckFileName(file_name)
+ file_name = library_name + "/" + file_name
+ if file_name in files:
+ sys.exit("Error:%d: file %s listed in multiple groups" % (_line_number, file_name))
+ files.add(file_name)
+ item_files.add(file_name)
+
+def _IsLibrary(item): return item and item["type"] == "library"
+
+def _IsLibraryGroup(item): return item and "library" in item
+
+def _ReadDeps(deps_file, item, library_name):
+ global items, _line_number, _groups_to_be_defined
+ item_deps = item.get("deps")
+ while True:
+ line = _ReadLine(deps_file)
+ if not line: continue
+ if not line.startswith(" "): return line
+ if item_deps == None: item_deps = item["deps"] = set()
+ for dep in line.split():
+ _CheckGroupName(dep)
+ dep_item = items.get(dep)
+ if item["type"] == "system_symbols" and (_IsLibraryGroup(dep_item) or _IsLibrary(dep_item)):
+ sys.exit(("Error:%d: system_symbols depend on previously defined " +
+ "library or library group %s") % (_line_number, dep))
+ if dep_item == None:
+ # Add this dependency as a new group.
+ items[dep] = {"type": "group"}
+ if library_name: items[dep]["library"] = library_name
+ _groups_to_be_defined.add(dep)
+ item_deps.add(dep)
+
+def _AddSystemSymbol(item, symbol):
+ exports = item.get("system_symbols")
+ if exports == None: exports = item["system_symbols"] = set()
+ exports.add(symbol)
+
+def _ReadSystemSymbols(deps_file, item):
+ global _line_number
+ while True:
+ line = _ReadLine(deps_file)
+ if not line: continue
+ if not line.startswith(" "): return line
+ line = line.lstrip()
+ if '"' in line:
+ # One double-quote-enclosed symbol on the line, allows spaces in a symbol name.
+ symbol = line[1:-1]
+ if line.startswith('"') and line.endswith('"') and '"' not in symbol:
+ _AddSystemSymbol(item, symbol)
+ else:
+ sys.exit("Error:%d: invalid quoted symbol name %s" % (_line_number, line))
+ else:
+ # One or more space-separate symbols.
+ for symbol in line.split(): _AddSystemSymbol(item, symbol)
+
+def Load():
+ """Reads "dependencies.txt" and populates the module attributes."""
+ global items, libraries, _line_number, _groups_to_be_defined
+ deps_file = open("dependencies.txt")
+ try:
+ line = None
+ current_type = None
+ while True:
+ while not line: line = _RemoveComment(deps_file.next())
+
+ if line.startswith("library: "):
+ current_type = "library"
+ name = line[9:].lstrip()
+ _CheckLibraryName(name)
+ if name in items:
+ sys.exit("Error:%d: library definition using duplicate name %s" % (_line_number, name))
+ libraries.add(name)
+ item = items[name] = {"type": "library"}
+ line = _ReadFiles(deps_file, item, name)
+ elif line.startswith("group: "):
+ current_type = "group"
+ name = line[7:].lstrip()
+ _CheckGroupName(name)
+ if name not in items:
+ sys.exit("Error:%d: group %s defined before mentioned as a dependency" %
+ (_line_number, name))
+ if name not in _groups_to_be_defined:
+ sys.exit("Error:%d: group definition using duplicate name %s" % (_line_number, name))
+ _groups_to_be_defined.remove(name)
+ item = items[name]
+ library_name = item.get("library")
+ if library_name:
+ line = _ReadFiles(deps_file, item, library_name)
+ else:
+ line = _ReadSystemSymbols(deps_file, item)
+ elif line == " deps":
+ if current_type == "library":
+ line = _ReadDeps(deps_file, items[name], name)
+ elif current_type == "group":
+ item = items[name]
+ line = _ReadDeps(deps_file, item, item.get("library"))
+ elif current_type == "system_symbols":
+ item = items[current_type]
+ line = _ReadDeps(deps_file, item, None)
+ else:
+ sys.exit("Error:%d: deps before any library or group" % _line_number)
+ elif line == "system_symbols:":
+ current_type = "system_symbols"
+ if current_type in items:
+ sys.exit("Error:%d: duplicate entry for system_symbols" % _line_number)
+ item = items[current_type] = {"type": current_type}
+ line = _ReadSystemSymbols(deps_file, item)
+ else:
+ sys.exit("Syntax error:%d: %s" % (_line_number, line))
+ except StopIteration:
+ pass
+ if _groups_to_be_defined:
+ sys.exit("Error: some groups mentioned in dependencies are undefined: %s" % _groups_to_be_defined)
--- /dev/null
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: dependencies.txt
+#
+# created on: 2011may26
+# created by: Markus W. Scherer
+
+# Standard library symbols used by ICU --------------------------------------- #
+
+system_symbols:
+ deps
+ # C
+ PIC system_debug errno_perror malloc_functions c_strings c_string_formatting
+ floating_point trigonometry
+ stdlib_qsort
+ pthread system_locale
+ stdio_input stdio_output file_io readlink_function dir_io mmap_functions dlfcn
+ # C++
+ cplusplus iostream
+
+group: PIC
+ # Position-Independent Code (-fPIC) requires a Global Offset Table.
+ _GLOBAL_OFFSET_TABLE_
+
+group: system_debug
+ __assert_fail __stack_chk_fail
+
+group: errno_perror
+ perror # putil.cpp uprv_dl_open() calls perror("dlopen")
+
+group: malloc_functions
+ free malloc realloc
+
+group: c_strings
+ isspace
+ __ctype_b_loc # for <ctype.h>
+ # We must not use tolower and toupper because they are system-locale-sensitive (Turkish i).
+ strlen strchr strrchr strstr strcmp strncmp strcpy strncpy strcat strncat
+ memcmp memcpy memmove memset
+ # Additional symbols in an optimized build.
+ __strcpy_chk __strncpy_chk __strcat_chk __strncat_chk
+ __rawmemchr __memcpy_chk __memmove_chk
+
+group: c_string_formatting
+ atoi atol strtod strtol strtoul
+ sprintf
+ # Additional symbols in an optimized build.
+ __sprintf_chk
+
+group: floating_point
+ floor ceil modf fmod log pow sqrt
+
+group: trigonometry
+ acos asin atan atan2 cos sin tan
+ # Additional symbols in an optimized build.
+ sincos
+
+group: stdlib_qsort
+ qsort
+
+group: pthread
+ pthread_mutex_init pthread_mutex_destroy pthread_mutex_lock pthread_mutex_unlock
+
+group: system_locale
+ getenv
+ nl_langinfo setlocale
+ gettimeofday localtime_r tzname tzset __timezone
+
+group: stdio_input
+ fopen fclose fgets fread fseek ftell rewind feof fileno
+ # Additional symbols in an optimized build.
+ __fgets_chk __fread_chk
+
+group: stdio_output
+ fflush fwrite
+
+group: file_io
+ open close stat
+ # Additional symbols in an optimized build.
+ __xstat
+
+group: readlink_function
+ readlink # putil.cpp uprv_tzname() calls this in a hack to get the time zone name
+
+group: dir_io
+ opendir closedir readdir # for a hack to get the time zone name
+
+group: mmap_functions # for memory-mapped data loading
+ mmap munmap
+
+group: dlfcn
+ dlopen dlclose dlsym # called by putil.o only for icuplug.o
+
+group: cplusplus
+ __dynamic_cast
+ # The compiler generates references to the global operator delete
+ # even when no code actually uses it.
+ # ICU must not _use_ the global operator delete.
+ "operator delete(void*)"
+ # ICU also must not use the global operator new.
+ # "operator new[](unsigned long)"
+ # _Unwind_Resume is related to exceptions:
+ # "A call to this routine is inserted as the end of a landing pad that performs cleanup,
+ # but does not resume normal execution. It causes unwinding to proceed further."
+ # (Linux Standard Base Specification 1.3)
+ # Even though ICU does not actually use (nor handle) exceptions.
+ _Unwind_Resume
+
+group: iostream
+ "std::basic_ios<char, std::char_traits<char> >::clear(std::_Ios_Iostate)"
+ "std::basic_ios<char, std::char_traits<char> >::eof() const"
+ "std::basic_ios<char, std::char_traits<char> >::fail() const"
+ "std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)"
+ std::istream::get()
+ std::istream::putback(char)
+ # Additional symbols in an optimized build.
+ "std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)"
+
+# ICU common library --------------------------------------------------------- #
+
+library: stubdata
+ stubdata.o # Exports icudt48_dat.
+
+library: common
+ # All files in the common library are listed in its dependencies.
+ deps
+ # Libraries and groups that the common library depends on.
+ date_interval
+ breakiterator
+ uts46 filterednormalizer2 normalizer2 canonical_iterator
+ normlzr unormcmp unorm_it unorm
+ idna2003 stringprep
+ stringenumeration
+ unistr_core unistr_props unistr_case unistr_case_locale unistr_titlecase_brkiter unistr_cnv
+ uniset_core uniset_props uniset_closure usetiter uset uset_props
+ uiter
+ ucasemap ucasemap_titlecase_brkiter script_runs
+ uprops ubidi_props ucase uscript
+ ubidi ushape
+ resourcebundle service_registration resbund_cnv ures_cnv icudataver ucat
+ loclikely
+ conversion converter_selector ucnv_set ucnvdisp
+ messagepattern
+ icu_utility icu_utility_with_props
+ ustr_wcs
+ ucharstriebuilder ucharstrieiterator
+ bytestriebuilder bytestrieiterator
+ hashtable uhash uvector uvector32 uvector64 ulist
+ propsvec utrie2 utrie2_builder
+ sort
+ uinit utypes errorcode
+ icuplug
+ platform
+
+group: date_interval # class DateInterval
+ dtintrv.o
+ deps
+ platform
+
+group: breakiterator
+ # We could try to split off a breakiterator_builder group,
+ # but we still need uniset_props for code like in the ThaiBreakEngine constructor
+ # which does
+ # fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status)
+ brkiter.o brkeng.o ubrk.o
+ rbbi.o rbbinode.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o
+ rbbidata.o rbbirb.o
+ triedict.o dictbe.o
+ deps
+ resourcebundle service_registration
+ schriter utext uniset_core uniset_props
+ uhash ustack utrie
+ uvector32 # for triedict.o
+
+group: unormcmp # unorm_compare()
+ unormcmp.o
+ deps
+ filterednormalizer2
+ uniset_props # for uniset_getUnicode32Instance()
+ ucase
+
+group: unorm_it # UNormIterator
+ unorm_it.o
+ deps
+ unorm uiter
+
+group: unorm # old normalization C API
+ unorm.o
+ deps
+ filterednormalizer2
+ uniset_props # for uniset_getUnicode32Instance()
+ uiter
+
+group: normlzr # old Normalizer C++ class
+ normlzr.o
+ deps
+ filterednormalizer2
+ uniset_props # for uniset_getUnicode32Instance()
+ schriter
+
+group: uts46
+ uts46.o
+ deps
+ normalizer2 punycode
+ uchar # for u_charType() (via U_GET_GC_MASK(c))
+ ubidi_props # for u_charDirection() & ubidi_getJoiningType()
+ unistr_core
+ stringpiece bytestream
+
+group: filterednormalizer2
+ filterednormalizer2.o
+ deps
+ normalizer2
+
+group: idna2003
+ uidna.o
+ deps
+ stringprep punycode
+
+group: stringprep
+ usprep.o
+ deps
+ unorm # could change to use filterednormalizer2 directly for Unicode 3.2 normalization
+ normalizer2
+ ubidi_props
+
+group: canonical_iterator
+ caniter.o
+ deps
+ normalizer2 usetiter
+
+group: normalizer2
+ normalizer2.o
+ normalizer2impl.o
+ deps
+ uniset_core
+ unistr_core
+ utrie2_builder # for building CanonIterData & FCD
+ uvector # for building CanonIterData
+ uhash # for the instance cache
+ udata
+
+group: punycode
+ punycode.o
+ deps
+ platform
+
+group: uset_props
+ uset_props.o
+ deps
+ uniset_closure uniset_props uniset_core
+
+group: uset
+ uset.o
+ deps
+ uniset_core
+
+group: uniset_closure
+ uniset_closure.o
+ deps
+ uniset_core unistr_case_locale unistr_titlecase_brkiter
+
+group: uniset_props
+ uniset_props.o ruleiter.o
+ deps
+ uniset_core uprops unistr_case
+ parsepos
+ resourcebundle
+ propname unames
+
+group: parsepos
+ parsepos.o
+ deps
+ platform
+
+group: usetiter # UnicodeSetIterator
+ usetiter.o
+ deps
+ uniset_core
+
+group: uniset_core
+ unifilt.o unifunct.o
+ uniset.o bmpset.o unisetspan.o
+ deps
+ patternprops
+ unistr_core icu_utility
+ uvector
+
+group: icu_utility_with_props
+ util_props.o
+ deps
+ icu_utility uchar ucase
+
+group: icu_utility
+ util.o
+ deps
+ unistr_core patternprops
+
+group: utext
+ utext.o
+ deps
+ unistr_core ucase
+
+group: stringenumeration
+ ustrenum.o uenum.o
+ deps
+ unistr_core
+
+group: schriter
+ schriter.o
+ # The UCharCharacterIterator implements virtual void getText(UnicodeString& result)
+ # so it depends on UnicodeString, therefore it makes little sense to split
+ # schriter and uchriter into separate groups.
+ uchriter.o
+ deps
+ chariter unistr_core
+
+group: chariter
+ chariter.o
+ deps
+ platform
+
+group: uiter
+ uiter.o
+ deps
+ platform
+
+group: unistr_cnv
+ unistr_cnv.o
+ deps
+ conversion unistr_core
+
+group: unistr_core
+ unistr.o
+ deps
+ ustrtrns appendable
+
+group: uscript
+ uscript.o # uscript_getCode() accepts a locale ID and loads its script code data
+ deps
+ propname resourcebundle
+
+group: uprops
+ uprops.o
+ deps
+ normalizer2
+ uchar
+ ubidi_props
+ unistr_case ustring_case # only for case folding
+ ucase
+
+group: propname
+ propname.o
+ deps
+ bytestrie
+
+group: unames
+ unames.o
+ deps
+ uchar udata
+
+group: script_runs
+ usc_impl.o
+ deps
+ uchar
+
+group: uchar
+ uchar.o
+ deps
+ utrie2
+
+group: messagepattern # for MessageFormat and tools
+ messagepattern.o
+ deps
+ patternprops unistr_core
+
+group: patternprops
+ patternprops.o
+ deps
+ PIC
+
+group: ushape
+ ushape.o
+ deps
+ ubidi_props
+
+group: ubidi
+ ubidi.o ubidiln.o ubidiwrt.o
+ deps
+ ubidi_props
+ uchar # for doWriteReverse() which uses IS_COMBINING(u_charType(c))
+
+group: ubidi_props
+ ubidi_props.o
+ deps
+ utrie2
+
+group: unistr_props
+ unistr_props.o
+ deps
+ unistr_core uchar
+
+group: unistr_case_locale
+ unistr_case_locale.o
+ deps
+ unistr_case ustring_case_locale
+
+group: unistr_case
+ unistr_case.o
+ deps
+ unistr_core
+ ustring_case
+
+group: unistr_titlecase_brkiter
+ unistr_titlecase_brkiter.o
+ deps
+ ustr_titlecase_brkiter
+
+group: ustr_titlecase_brkiter
+ ustr_titlecase_brkiter.o
+ deps
+ breakiterator
+ ustring_case_locale ucase
+
+group: ucasemap_titlecase_brkiter
+ ucasemap_titlecase_brkiter.o
+ deps
+ ucasemap breakiterator utext
+
+group: ucasemap
+ ucasemap.o
+ deps
+ ustring_case
+ resourcebundle # uloc_getName() etc.
+
+group: ustring_case_locale
+ ustrcase_locale.o
+ deps
+ ustring_case
+ resourcebundle # for uloc_getDefault()
+
+group: ustring_case
+ ustrcase.o
+ deps
+ ucase
+
+group: ucase
+ ucase.o
+ deps
+ utrie2
+
+group: uinit
+ uinit.o
+ deps
+ ucnv_io icuplug
+
+group: converter_selector
+ ucnvsel.o
+ deps
+ conversion propsvec utrie2_builder uset ucnv_set
+
+group: ucnvdisp # ucnv_getDisplayName()
+ ucnvdisp.o
+ deps
+ conversion resourcebundle
+
+group: ucnv_set # ucnv_getUnicodeSet
+ ucnv_set.o
+ deps
+ uset
+
+group: conversion
+ ustr_cnv.o
+ ucnv.o ucnv_cnv.o ucnv_bld.o ucnv_cb.o ucnv_err.o
+ ucnv_ct.o
+ ucnvmbcs.o ucnv_ext.o
+ ucnvhz.o ucnvisci.o ucnv_lmb.o ucnv2022.o
+ ucnvlat1.o ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o
+ ucnvbocu.o ucnvscsu.o
+ deps
+ ucnv_io
+
+group: ucnv_io
+ ucnv_io.o
+ deps
+ sort stringenumeration udata
+
+group: service_registration
+ serv.o servnotf.o servlkf.o servlk.o servls.o servrbf.o servslkf.o
+ locutil.o
+ deps
+ locale_display_names resourcebundle
+ hashtable uvector
+
+group: ucat # message-catalog-like API
+ ucat.o
+ deps
+ resourcebundle
+
+group: locale_display_names
+ locdispnames.o
+ deps
+ locresdata
+
+group: icudataver # u_getDataVersion()
+ icudataver.o
+ deps
+ resourcebundle
+
+group: loclikely
+ loclikely.o
+ deps
+ resourcebundle
+
+group: locresdata
+ # This was intended to collect locale functions that load resource bundle data.
+ # See the resourcebundle group about what else loads data.
+ locresdata.o
+ deps
+ resourcebundle
+
+group: resbund_cnv # paths are Unicode strings
+ resbund_cnv.o
+ deps
+ conversion resourcebundle ures_cnv
+
+group: ures_cnv # ures_openU, path is a Unicode string
+ ures_cnv.o
+ deps
+ conversion resourcebundle
+
+group: resourcebundle
+ resbund.o uresbund.o uresdata.o
+ locavailable.o
+ # uloc_tag.c converts between old ICU/LDML/CLDR locale IDs and newer BCP 47 IDs.
+ # It uses data from resource bundles for some of the mappings.
+ # We might want to generate .c files for that data, to #include rather than load,
+ # to minimize dependencies from this code.
+ # Then we could separate this higher-level locale ID code from the resource bundle code.
+ uloc.o uloc_tag.o
+ # Even basic locid.cpp via Locale constructors and Locale::getDefault()
+ # depend on canonicalization and data loading.
+ # We can probably only disentangle basic locale ID handling from resource bundle code
+ # by hardcoding all of the locale ID data.
+ locid.o locmap.o wintz.o
+ # Do we need class LocaleBased? http://bugs.icu-project.org/trac/ticket/8608
+ locbased.o
+ deps
+ udata ucol_swp
+ sort stringenumeration uhash
+
+group: udata
+ udata.o ucmndata.o udatamem.o
+ umapfile.o
+ deps
+ uhash charstr stringpiece platform stubdata
+ file_io mmap_functions
+
+group: ucharstriebuilder
+ ucharstriebuilder.o
+ deps
+ ucharstrie stringtriebuilder sort
+ unistr_core
+
+group: ucharstrieiterator
+ ucharstrieiterator.o
+ deps
+ ucharstrie unistr_core uvector32
+
+group: ucharstrie
+ ucharstrie.o
+ deps
+ platform
+
+group: bytestriebuilder
+ bytestriebuilder.o
+ deps
+ bytestrie stringtriebuilder sort
+ charstr stringpiece
+
+group: bytestrieiterator
+ bytestrieiterator.o
+ deps
+ bytestrie charstr uvector32
+
+group: bytestrie
+ bytestrie.o
+ deps
+ platform
+
+group: stringtriebuilder
+ stringtriebuilder.o
+ deps
+ uhash
+
+group: propsvec
+ propsvec.o
+ deps
+ sort utrie2_builder
+
+group: utrie2_builder
+ utrie2_builder.o
+ deps
+ platform
+ utrie2
+ utrie # for utrie2_fromUTrie()
+ ucol_swp # for utrie_swap()
+
+group: utrie2
+ utrie2.o
+ deps
+ platform
+
+group: utrie # Callers should use utrie2 instead.
+ utrie.o
+ deps
+ platform
+
+group: hashtable # Maps UnicodeString to value.
+ uhash_us.o
+ deps
+ unistr_core
+ uhash
+
+group: uhash
+ uhash.o
+ deps
+ platform
+
+group: ustack
+ ustack.o
+ deps
+ uvector
+
+group: uvector
+ uvector.o
+ deps
+ platform
+ sort # for UVector::sort()
+
+group: uvector32
+ uvectr32.o
+ deps
+ platform
+
+group: uvector64
+ uvectr64.o
+ deps
+ platform
+
+group: ulist
+ ulist.o
+ deps
+ platform
+
+group: sort
+ uarrsort.o
+ deps
+ platform
+
+group: ustr_wcs
+ ustr_wcs.o
+ deps
+ ustrtrns # on platforms where wchar_t is UTF-32
+ # platform -- on other platforms
+
+group: ustrtrns
+ ustrtrns.o
+ deps
+ platform
+
+group: charstr
+ charstr.o
+ deps
+ unistr_core # for CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode)
+ platform
+
+group: stringpiece
+ stringpiece.o
+ deps
+ PIC c_strings
+
+group: bytestream
+ bytestream.o
+ deps
+ platform
+
+group: appendable
+ appendable.o
+ deps
+ platform
+
+group: icuplug
+ icuplug.o
+ deps
+ platform
+
+group: ucol_swp
+ ucol_swp.o
+ deps
+ platform
+
+group: errorcode # ErrorCode base class
+ errorcode.o
+ deps
+ utypes
+ PIC
+
+group: utypes # u_errorName()
+ utypes.o
+
+group: platform
+ # Files in the "platform" group.
+ cmemory.o uobject.o
+ cstring.o cwchar.o uinvchar.o
+ ustring.o # Other platform files really just need u_strlen
+ ustrfmt.o # uprv_itou
+ utf_impl.o
+ putil.o
+ ucln_cmn.o # for putil.o which calls ucln_common_registerCleanup
+ udataswp.o # for uinvchar.o; TODO: move uinvchar.o swapper functions to udataswp.o?
+ umath.o
+ mutex.o umutex.o
+ utrace.o
+ deps
+ # The "platform" group has no ICU dependencies.
+ PIC system_debug malloc_functions c_strings c_string_formatting
+ floating_point pthread system_locale
+ stdio_input readlink_function dir_io
+ errno_perror dlfcn # Move related code into icuplug.c?
+ cplusplus
+
+# ICU i18n library ----------------------------------------------------------- #
+
+library: i18n
+ deps
+ localedata charset_detector spoof_detection
+ alphabetic_index collation formatting formattable_cnv regex regex_cnv translit
+ universal_time_scale
+ uclean_i18n
+
+group: localedata
+ ulocdata.o
+ deps
+ uniset_props resourcebundle
+ uset_props # TODO: change to using C++ UnicodeSet, remove this dependency
+
+group: charset_detector
+ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o ucsdet.o
+ deps
+ conversion
+ uclean_i18n
+
+group: spoof_detection
+ uspoof.o uspoof_build.o uspoof_conf.o uspoof_impl.o uspoof_wsconf.o
+ deps
+ uniset_props regex unorm uscript
+
+group: alphabetic_index
+ alphaindex.o
+ deps
+ collation localedata
+ uclean_i18n
+
+group: collation
+ bocsu.o coleitr.o coll.o colldata.o sortkey.o tblcoll.o ucol.o
+ ucol_bld.o ucol_cnt.o ucol_elm.o ucol_res.o ucol_sit.o ucol_tok.o ucol_wgt.o ucoleitr.o
+ bms.o bmsearch.o search.o stsearch.o usearch.o
+ deps
+ common # TODO: Could be narrower.
+ uclean_i18n
+
+group: formatting
+ # TODO: Try to subdivide this ball of wax.
+ # locale_display_names2
+ locdspnm.o
+ # currency
+ ucurr.o
+ # currencyformat
+ curramt.o currfmt.o currpinf.o currunit.o
+ # decimalformat
+ dcfmtsym.o decfmtst.o decimfmt.o
+ numfmt.o numsys.o unum.o winnmfmt.o
+ # rbnf
+ nfrs.o nfrule.o nfsubs.o rbnf.o
+ # measureformat
+ measfmt.o
+ # dateformat
+ astro.o buddhcal.o calendar.o cecal.o chnsecal.o coptccal.o ethpccal.o
+ gregocal.o gregoimp.o hebrwcal.o indiancal.o islamcal.o japancal.o persncal.o taiwncal.o
+ ucal.o
+ basictz.o olsontz.o rbtz.o simpletz.o timezone.o tzrule.o tztrans.o
+ vtzone.o vzone.o wintzimpl.o zonemeta.o zrule.o ztrans.o
+ tzfmt.o tzgnames.o tznames.o tznames_impl.o
+ datefmt.o dtfmtsym.o dtitvfmt.o dtitvinf.o dtptngen.o dtrule.o reldtfmt.o
+ smpdtfmt.o smpdtfst.o udateintervalformat.o udatpg.o windtfmt.o
+ udat.o
+ tmunit.o tmutamt.o tmutfmt.o
+ # messageformat
+ choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o
+ deps
+ digitlist formattable format
+ pluralrules
+ collation # for rbnf
+ common
+ floating_point # sqrt() for astro.o
+ trigonometry # for astro.o
+ stdlib_qsort # for ucurr.o (which does not use ICU's uarrsort.o)
+ uclean_i18n
+
+group: digitlist
+ digitlst.o decContext.o decNumber.o
+ deps
+ charstr stringpiece unistr_core
+
+group: formattable
+ fmtable.o
+ measure.o
+ deps
+ unistr_core digitlist stringpiece charstr
+
+group: formattable_cnv
+ fmtable_cnv.o
+ deps
+ formattable unistr_cnv conversion
+
+group: format
+ format.o fphdlimp.o fpositer.o
+ deps
+ resourcebundle parsepos unistr_core uvector32
+
+group: pluralrules
+ plurrule.o upluralrules.o
+ deps
+ patternprops resourcebundle uvector
+ unistr_case_locale
+
+group: regex_cnv
+ uregexc.o
+ deps
+ regex unistr_cnv
+
+group: regex
+ regexcmp.o regexst.o regextxt.o rematch.o repattrn.o uregex.o
+ deps
+ uniset_closure utext uvector32 uvector64 ustack
+ breakiterator
+ unistr_core
+ uinit # TODO: Really needed?
+ uclean_i18n
+
+group: translit
+ anytrans.o brktrans.o casetrn.o cpdtrans.o name2uni.o uni2name.o nortrans.o remtrans.o titletrn.o tolowtrn.o toupptrn.o
+ esctrn.o unesctrn.o nultrans.o
+ funcrepl.o quant.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o strmatch.o strrepl.o translit.o transreg.o tridpars.o utrans.o
+ deps
+ common
+ formatting # for Transliterator::getDisplayName()
+ uclean_i18n
+
+group: universal_time_scale
+ utmscale.o
+
+group: uclean_i18n
+ ucln_in.o
+ deps
+ platform
+
+# ICU io library ------------------------------------------------------------- #
+
+library: io
+ deps
+ ustdio ustream uclean_io
+
+group: ustdio
+ locbund.o sprintf.o sscanf.o ufile.o ufmt_cmn.o uprintf.o uprntf_p.o uscanf.o uscanf_p.o ustdio.o
+ deps
+ formatting conversion translit
+ uclean_io
+ stdio_output
+
+group: ustream
+ ustream.o
+ deps
+ unistr_cnv
+ uchar # for u_isWhitespace()
+ iostream
+
+group: uclean_io
+ ucln_io.o
+ deps
+ platform
--- /dev/null
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2011, International Business Machines
+# Corporation and others. All Rights Reserved.
+#
+# file name: depstest.py
+#
+# created on: 2011may24
+
+"""ICU dependency tester.
+
+This probably works only on Linux.
+
+The exit code is 0 if everything is fine, 1 for errors, 2 for only warnings.
+
+Sample invocation:
+ ~/svn.icu/trunk/src/source/test/depstest$ ./depstest.py ~/svn.icu/trunk/dbg
+"""
+
+__author__ = "Markus W. Scherer"
+
+import glob
+import os.path
+import subprocess
+import sys
+
+import dependencies
+
+_ignored_symbols = set()
+_obj_files = {}
+_symbols_to_files = {}
+_return_value = 0
+
+def _ReadObjFile(root_path, library_name, obj_name):
+ global _ignored_symbols, _obj_files, _symbols_to_files
+ lib_obj_name = library_name + "/" + obj_name
+ if lib_obj_name in _obj_files:
+ print "Warning: duplicate .o file " + lib_obj_name
+ _return_value = 2
+ return
+
+ path = os.path.join(root_path, library_name, obj_name)
+ nm_result = subprocess.Popen(["nm", "--demangle", "--format=sysv",
+ "--extern-only", "--no-sort", path],
+ stdout=subprocess.PIPE).communicate()[0]
+ obj_imports = set()
+ obj_exports = set()
+ for line in nm_result.splitlines():
+ fields = line.split("|")
+ if len(fields) == 1: continue
+ name = fields[0].strip()
+ # Ignore symbols like '__cxa_pure_virtual',
+ # 'vtable for __cxxabiv1::__si_class_type_info' or
+ # 'DW.ref.__gxx_personality_v0'.
+ if name.startswith("__cxa") or "__cxxabi" in name or "__gxx" in name:
+ _ignored_symbols.add(name)
+ continue
+ type = fields[2].strip()
+ if type == "U":
+ obj_imports.add(name)
+ else:
+ # TODO: Investigate weak symbols (V, W) with or without values.
+ obj_exports.add(name)
+ _symbols_to_files[name] = lib_obj_name
+ _obj_files[lib_obj_name] = {"imports": obj_imports, "exports": obj_exports}
+
+def _ReadLibrary(root_path, library_name):
+ obj_paths = glob.glob(os.path.join(root_path, library_name, "*.o"))
+ for path in obj_paths:
+ _ReadObjFile(root_path, library_name, os.path.basename(path))
+
+def _Resolve(name, parents):
+ global _ignored_symbols, _obj_files, _symbols_to_files, _return_value
+ item = dependencies.items[name]
+ item_type = item["type"]
+ if name in parents:
+ sys.exit("Error: %s %s has a circular dependency on itself: %s" %
+ (item_type, name, parents))
+ # Check if already cached.
+ exports = item.get("exports")
+ if exports != None: return item
+ # Calculcate recursively.
+ parents.append(name)
+ imports = set()
+ exports = set()
+ system_symbols = item.get("system_symbols")
+ if system_symbols == None: system_symbols = item["system_symbols"] = set()
+ files = item.get("files")
+ if files:
+ for file_name in files:
+ obj_file = _obj_files[file_name]
+ imports |= obj_file["imports"]
+ exports |= obj_file["exports"]
+ imports -= exports | _ignored_symbols
+ deps = item.get("deps")
+ if deps:
+ for dep in deps:
+ dep_item = _Resolve(dep, parents)
+ # Detect whether this item needs to depend on dep,
+ # except when this item has no files, that is, when it is just
+ # a deliberate umbrella group or library.
+ dep_exports = dep_item["exports"]
+ dep_system_symbols = dep_item["system_symbols"]
+ if files and imports.isdisjoint(dep_exports) and imports.isdisjoint(dep_system_symbols):
+ print "Info: %s %s does not need to depend on %s\n" % (item_type, name, dep)
+ # We always include the dependency's exports, even if we do not need them
+ # to satisfy local imports.
+ exports |= dep_exports
+ system_symbols |= dep_system_symbols
+ item["exports"] = exports
+ item["system_symbols"] = system_symbols
+ imports -= exports | system_symbols
+ for symbol in imports:
+ for file_name in files:
+ if symbol in _obj_files[file_name]["imports"]:
+ sys.stderr.write("Error: %s %s file %s imports %s but %s does not depend on %s\n" %
+ (item_type, name, file_name, symbol, name, _symbols_to_files.get(symbol)))
+ _return_value = 1
+ del parents[-1]
+ return item
+
+def Process(root_path):
+ """Loads dependencies.txt, reads the libraries' .o files, and processes them.
+
+ Modifies dependencies.items: Recursively builds each item's system_symbols and exports.
+ """
+ global _ignored_symbols, _obj_files, _return_value
+ dependencies.Load()
+ for name_and_item in dependencies.items.iteritems():
+ name = name_and_item[0]
+ item = name_and_item[1]
+ system_symbols = item.get("system_symbols")
+ if system_symbols:
+ for symbol in system_symbols:
+ _symbols_to_files[symbol] = name
+ for library_name in dependencies.libraries:
+ _ReadLibrary(root_path, library_name)
+ o_files_set = set(_obj_files.keys())
+ files_missing_from_deps = o_files_set - dependencies.files
+ files_missing_from_build = dependencies.files - o_files_set
+ if files_missing_from_deps:
+ sys.stderr.write("Error: files missing from dependencies.txt:\n%s\n" %
+ sorted(files_missing_from_deps))
+ _return_value = 1
+ if files_missing_from_build:
+ sys.stderr.write("Error: files in dependencies.txt but not built:\n%s\n" %
+ sorted(files_missing_from_build))
+ _return_value = 1
+ if not _return_value:
+ for library_name in dependencies.libraries:
+ _Resolve(library_name, [])
+
+def main():
+ global _return_value
+ if len(sys.argv) <= 1:
+ sys.exit(("Command line error: " +
+ "need one argument with the root path to the built ICU libraries/*.o files."))
+ Process(sys.argv[1])
+ if _ignored_symbols:
+ print "Info: ignored symbols:\n%s" % sorted(_ignored_symbols)
+ if not _return_value:
+ print "OK: Specified and actual dependencies match."
+ return _return_value
+
+if __name__ == "__main__":
+ sys.exit(main())
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2002-2010, International Business Machines Corporation and
+ * Copyright (c) 2002-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
// NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
Hashtable *permutations = new Hashtable(FALSE, status);
- permutations->setValueDeleter(uhash_deleteUnicodeString);
+ permutations->setValueDeleter(uprv_deleteUObject);
UnicodeString toPermute("ABC");
CanonicalIterator::permute(toPermute, FALSE, permutations, status);
// try samples
logln("testing samples");
Hashtable *set = new Hashtable(FALSE, status);
- set->setValueDeleter(uhash_deleteUnicodeString);
+ set->setValueDeleter(uprv_deleteUObject);
int32_t i = 0;
CanonicalIterator it("", status);
if(U_SUCCESS(status)) {
/**
*******************************************************************************
- * Copyright (C) 2001-2010, International Business Machines Corporation and
+ * Copyright (C) 2001-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
// should not be able to locate invisible services
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
service.getVisibleIDs(ids, status);
UnicodeString target = "en_US_BAR";
confirmBoolean("18) find invisible", !ids.contains(&target));
public:
TestMultipleKeyStringFactory(const UnicodeString ids[], int32_t count, const UnicodeString& factoryID)
: _status(U_ZERO_ERROR)
- , _ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, count, _status)
+ , _ids(uprv_deleteUObject, uhash_compareUnicodeString, count, _status)
, _factoryID(factoryID + ": ")
{
for (int i = 0; i < count; ++i) {
// iterate over the visual ids returned by the multiple factory
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
for (int i = 0; i < ids.size(); ++i) {
const UnicodeString* id = (const UnicodeString*)ids[i];
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
for (int i = 0; i < ids.size(); ++i) {
const UnicodeString* id = (const UnicodeString*)ids[i];
// list all of the resources
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
Locale::setDefault(loc, status);
{
UErrorCode status = U_ZERO_ERROR;
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, 0, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, 0, status);
service.getVisibleIDs(ids, status);
logln("all visible ids:");
for (int i = 0; i < ids.size(); ++i) {
}
}
- UVector ids(uhash_deleteUnicodeString, uhash_compareUnicodeString, status);
+ UVector ids(uprv_deleteUObject, uhash_compareUnicodeString, status);
// yuck, this is awkward to use. All because we pass null in an overload.
// TODO: change this.
UnicodeString str("Greet");
/*
*******************************************************************************
- * Copyright (C) 2003-2010, International Business Machines Corporation and *
+ * Copyright (C) 2003-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
errln("memory allocation error");
return;
}
- fuFUNames->setValueDeleter(uhash_deleteUnicodeString);
+ fuFUNames->setValueDeleter(uprv_deleteUObject);
fuFUNames->put(fu_FU.getName(), new UnicodeString("ze leetle bunny Fu-Fu"), status);
fuFUNames->put(fu_FU_FOO.getName(), new UnicodeString("zee leetel bunny Foo-Foo"), status);
Hashtable contents;
TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
- contents.setValueDeleter(uhash_deleteUnicodeString);
+ contents.setValueDeleter(uprv_deleteUObject);
}
~TokenSymbolTable() {}
static int32_t U_CALLCONV
string_hash(const UHashTok key) {
const struct SResource *res = (struct SResource *)key.pointer;
- return uhash_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
+ return ustr_hashUCharsN(res->u.fString.fChars, res->u.fString.fLength);
}
static UBool U_CALLCONV