From fbd1e089fd74bff419f2a82336133a1349eb17d3 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 12 Jan 2017 19:05:01 +0000 Subject: [PATCH] ICU-12410 add C++ CaseMap, make UCaseMap an alias for that, move new functions with Edits into CaseMap, make case properties functions ignore UCaseProps pointer X-SVN-Rev: 39554 --- icu4c/source/common/ucase.cpp | 150 +++--- icu4c/source/common/ucasemap.cpp | 154 +++--- .../common/ucasemap_titlecase_brkiter.cpp | 36 +- icu4c/source/common/unicode/ucasemap.h | 463 +++++++++--------- icu4c/source/common/unicode/unistr.h | 27 +- icu4c/source/common/unistr_case.cpp | 9 +- icu4c/source/common/unistr_case_locale.cpp | 30 +- .../common/unistr_titlecase_brkiter.cpp | 28 +- icu4c/source/common/ustr_imp.h | 76 ++- .../source/common/ustr_titlecase_brkiter.cpp | 94 ++-- icu4c/source/common/ustrcase.cpp | 95 ++-- icu4c/source/common/ustrcase_locale.cpp | 108 ++-- icu4c/source/test/cintltst/cstrcase.c | 7 +- 13 files changed, 635 insertions(+), 642 deletions(-) diff --git a/icu4c/source/common/ucase.cpp b/icu4c/source/common/ucase.cpp index 97ded9ee2d1..051c914da31 100644 --- a/icu4c/source/common/ucase.cpp +++ b/icu4c/source/common/ucase.cpp @@ -64,13 +64,13 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, ui } U_CFUNC void U_EXPORT2 -ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) { +ucase_addPropertyStarts(const UCaseProps * /* unused csp */, const USetAdder *sa, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return; } /* add the start code point of each same-value range of the trie */ - utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa); + utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); /* add code points with hardcoded properties, plus the ones following them */ @@ -133,14 +133,14 @@ static const uint8_t flagsOffset[256]={ /* simple case mappings ----------------------------------------------------- */ U_CAPI UChar32 U_EXPORT2 -ucase_tolower(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_tolower(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); @@ -150,14 +150,14 @@ ucase_tolower(const UCaseProps *csp, UChar32 c) { } U_CAPI UChar32 U_EXPORT2 -ucase_toupper(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_toupper(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); @@ -167,14 +167,14 @@ ucase_toupper(const UCaseProps *csp, UChar32 c) { } U_CAPI UChar32 U_EXPORT2 -ucase_totitle(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_totitle(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; int32_t idx; if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { @@ -198,7 +198,7 @@ static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; U_CFUNC void U_EXPORT2 -ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { +ucase_addCaseClosure(const UCaseProps * /* unused csp */, UChar32 c, const USetAdder *sa) { uint16_t props; /* @@ -229,7 +229,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { break; } - props=UTRIE2_GET16(&csp->trie, c); + props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)!=UCASE_NONE) { /* add the one simple case mapping, no matter what type it is */ @@ -243,7 +243,7 @@ ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { * c has exceptions, so there may be multiple simple and/or * full case mappings. Add them all. */ - const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); const UChar *closure; uint16_t excWord=*pe++; int32_t idx, closureLength, fullLength, length; @@ -338,10 +338,10 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { } U_CFUNC UBool U_EXPORT2 -ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) { +ucase_addStringCaseClosure(const UCaseProps * /* unused csp */, const UChar *s, int32_t length, const USetAdder *sa) { int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; - if(csp->unfold==NULL || s==NULL) { + if(ucase_props_singleton.unfold==NULL || s==NULL) { return FALSE; /* no reverse case folding data, or no string */ } if(length<=1) { @@ -355,7 +355,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length return FALSE; } - const uint16_t *unfold=csp->unfold; + const uint16_t *unfold=ucase_props_singleton.unfold; unfoldRows=unfold[UCASE_UNFOLD_ROWS]; unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; @@ -381,7 +381,7 @@ ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length for(i=unfoldStringWidth; iadd(sa->set, c); - ucase_addCaseClosure(csp, c, sa); + ucase_addCaseClosure(&ucase_props_singleton, c, sa); } return TRUE; } else if(result<0) { @@ -430,38 +430,38 @@ U_NAMESPACE_END /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ U_CAPI int32_t U_EXPORT2 -ucase_getType(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_getType(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return UCASE_GET_TYPE(props); } /** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ U_CAPI int32_t U_EXPORT2 -ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_getTypeOrIgnorable(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return UCASE_GET_TYPE_AND_IGNORABLE(props); } /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ static inline int32_t -getDotType(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +getDotType(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { return props&UCASE_DOT_MASK; } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; } } U_CAPI UBool U_EXPORT2 -ucase_isSoftDotted(const UCaseProps *csp, UChar32 c) { - return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED); +ucase_isSoftDotted(const UCaseProps * /* unused csp */, UChar32 c) { + return (UBool)(getDotType(&ucase_props_singleton, c)==UCASE_SOFT_DOTTED); } U_CAPI UBool U_EXPORT2 -ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_isCaseSensitive(const UCaseProps * /* unused csp */, UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); return (UBool)((props&UCASE_SENSITIVE)!=0); } @@ -662,7 +662,7 @@ ucase_getCaseLocale(const char *locale, int32_t *locCache) { * it is also cased or not. */ static UBool -isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) { +isFollowedByCasedLetter(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context, int8_t dir) { UChar32 c; if(iter==NULL) { @@ -670,7 +670,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void } for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { - int32_t type=ucase_getTypeOrIgnorable(csp, c); + int32_t type=ucase_getTypeOrIgnorable(&ucase_props_singleton, c); if(type&4) { /* case-ignorable, continue with the loop */ } else if(type!=UCASE_NONE) { @@ -685,7 +685,7 @@ isFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void /* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ static UBool -isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isPrecededBySoftDotted(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -695,7 +695,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void * } for(dir=-1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(csp, c); + dotType=getDotType(&ucase_props_singleton, c); if(dotType==UCASE_SOFT_DOTTED) { return TRUE; /* preceded by TYPE_i */ } else if(dotType!=UCASE_OTHER_ACCENT) { @@ -742,7 +742,7 @@ isPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void * /* Is preceded by base character 'I' with no intervening cc=230 ? */ static UBool -isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isPrecededBy_I(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -755,7 +755,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) if(c==0x49) { return TRUE; /* preceded by I */ } - dotType=getDotType(csp, c); + dotType=getDotType(&ucase_props_singleton, c); if(dotType!=UCASE_OTHER_ACCENT) { return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ } @@ -766,7 +766,7 @@ isPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) /* Is followed by one or more cc==230 ? */ static UBool -isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isFollowedByMoreAbove(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -776,7 +776,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c } for(dir=1; (c=iter(context, dir))>=0; dir=0) { - dotType=getDotType(csp, c); + dotType=getDotType(&ucase_props_singleton, c); if(dotType==UCASE_ABOVE) { return TRUE; /* at least one cc==230 following */ } else if(dotType!=UCASE_OTHER_ACCENT) { @@ -789,7 +789,7 @@ isFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *c /* Is followed by a dot above (without cc==230 in between) ? */ static UBool -isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { +isFollowedByDotAbove(const UCaseProps * /* unused csp */, UCaseContextIterator *iter, void *context) { UChar32 c; int32_t dotType; int8_t dir; @@ -802,7 +802,7 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co if(c==0x307) { return TRUE; } - dotType=getDotType(csp, c); + dotType=getDotType(&ucase_props_singleton, c); if(dotType!=UCASE_OTHER_ACCENT) { return FALSE; /* next base character or cc==230 in between */ } @@ -812,20 +812,20 @@ isFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *co } U_CAPI int32_t U_EXPORT2 -ucase_toFullLower(const UCaseProps *csp, UChar32 c, +ucase_toFullLower(const UCaseProps * /* unused csp */, UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, const char *locale, int32_t *locCache) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full; @@ -844,7 +844,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, if( loc==UCASE_LOC_LITHUANIAN && /* base characters, find accents above */ (((c==0x49 || c==0x4a || c==0x12e) && - isFollowedByMoreAbove(csp, iter, context)) || + isFollowedByMoreAbove(&ucase_props_singleton, iter, context)) || /* precomposed with accent above, no need to find one */ (c==0xcc || c==0xcd || c==0x128)) ) { @@ -896,7 +896,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE */ return 0x69; - } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(&ucase_props_singleton, iter, context)) { /* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. # This matches the behavior of the canonically equivalent I-dot_above @@ -905,7 +905,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ return 0; /* remove the dot (continue without output) */ - } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { + } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(&ucase_props_singleton, iter, context)) { /* # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. @@ -922,8 +922,8 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, *pString=iDot; return 2; } else if( c==0x3a3 && - !isFollowedByCasedLetter(csp, iter, context, 1) && - isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */ + !isFollowedByCasedLetter(&ucase_props_singleton, iter, context, 1) && + isFollowedByCasedLetter(&ucase_props_singleton, iter, context, -1) /* -1=preceded */ ) { /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ /* @@ -957,7 +957,7 @@ ucase_toFullLower(const UCaseProps *csp, UChar32 c, /* internal */ static int32_t -toUpperOrTitle(const UCaseProps *csp, UChar32 c, +toUpperOrTitle(const UCaseProps * /* unused csp */, UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, const char *locale, int32_t *locCache, @@ -965,13 +965,13 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full, idx; @@ -994,7 +994,7 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I */ return 0x130; - } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { + } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(&ucase_props_singleton, iter, context)) { /* # Lithuanian @@ -1052,19 +1052,19 @@ toUpperOrTitle(const UCaseProps *csp, UChar32 c, } U_CAPI int32_t U_EXPORT2 -ucase_toFullUpper(const UCaseProps *csp, UChar32 c, +ucase_toFullUpper(const UCaseProps * /* unused csp */, UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, const char *locale, int32_t *locCache) { - return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE); + return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, TRUE); } U_CAPI int32_t U_EXPORT2 -ucase_toFullTitle(const UCaseProps *csp, UChar32 c, +ucase_toFullTitle(const UCaseProps * /* unused csp */, UChar32 c, UCaseContextIterator *iter, void *context, const UChar **pString, const char *locale, int32_t *locCache) { - return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE); + return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, FALSE); } /* case folding ------------------------------------------------------------- */ @@ -1110,14 +1110,14 @@ ucase_toFullTitle(const UCaseProps *csp, UChar32 c, /* return the simple case folding mapping for c */ U_CAPI UChar32 U_EXPORT2 -ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { - uint16_t props=UTRIE2_GET16(&csp->trie, c); +ucase_fold(const UCaseProps * /* unused csp */, UChar32 c, uint32_t options) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { c+=UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); uint16_t excWord=*pe++; int32_t idx; if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { @@ -1170,19 +1170,19 @@ ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { */ U_CAPI int32_t U_EXPORT2 -ucase_toFullFolding(const UCaseProps *csp, UChar32 c, +ucase_toFullFolding(const UCaseProps * /* unused csp */, UChar32 c, const UChar **pString, uint32_t options) { // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; - uint16_t props=UTRIE2_GET16(&csp->trie, c); + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!PROPS_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { result=c+UCASE_GET_DELTA(props); } } else { - const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; uint16_t excWord=*pe++; int32_t full, idx; @@ -1287,23 +1287,19 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) { /* case mapping properties */ const UChar *resultString; int32_t locCache; - const UCaseProps *csp=GET_CASE_PROPS(); - if(csp==NULL) { - return FALSE; - } switch(which) { case UCHAR_LOWERCASE: - return (UBool)(UCASE_LOWER==ucase_getType(csp, c)); + return (UBool)(UCASE_LOWER==ucase_getType(&ucase_props_singleton, c)); case UCHAR_UPPERCASE: - return (UBool)(UCASE_UPPER==ucase_getType(csp, c)); + return (UBool)(UCASE_UPPER==ucase_getType(&ucase_props_singleton, c)); case UCHAR_SOFT_DOTTED: - return ucase_isSoftDotted(csp, c); + return ucase_isSoftDotted(&ucase_props_singleton, c); case UCHAR_CASE_SENSITIVE: - return ucase_isCaseSensitive(csp, c); + return ucase_isCaseSensitive(&ucase_props_singleton, c); case UCHAR_CASED: - return (UBool)(UCASE_NONE!=ucase_getType(csp, c)); + return (UBool)(UCASE_NONE!=ucase_getType(&ucase_props_singleton, c)); case UCHAR_CASE_IGNORABLE: - return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2); + return (UBool)(ucase_getTypeOrIgnorable(&ucase_props_singleton, c)>>2); /* * Note: The following Changes_When_Xyz are defined as testing whether * the NFD form of the input changes when Xyz-case-mapped. @@ -1318,20 +1314,20 @@ ucase_hasBinaryProperty(UChar32 c, UProperty which) { */ case UCHAR_CHANGES_WHEN_LOWERCASED: locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0); case UCHAR_CHANGES_WHEN_UPPERCASED: locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0); case UCHAR_CHANGES_WHEN_TITLECASED: locCache=UCASE_LOC_ROOT; - return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + return (UBool)(ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0); /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ case UCHAR_CHANGES_WHEN_CASEMAPPED: locCache=UCASE_LOC_ROOT; return (UBool)( - ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || - ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || - ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); + ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 || + ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 || + ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0); default: return FALSE; } diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index 0576a26ddd1..45ac1757e82 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -37,82 +37,57 @@ U_NAMESPACE_USE +using icu::internal::CaseMapFriend; + /* UCaseMap service object -------------------------------------------------- */ U_CAPI UCaseMap * U_EXPORT2 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { - UCaseMap *csm; - if(U_FAILURE(*pErrorCode)) { return NULL; } - - csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap)); + CaseMap *csm = new CaseMap(locale, options, *pErrorCode); if(csm==NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; return NULL; - } - uprv_memset(csm, 0, sizeof(UCaseMap)); - - csm->csp=ucase_getSingleton(); - ucasemap_setLocale(csm, locale, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - uprv_free(csm); + } else if (U_FAILURE(*pErrorCode)) { + delete csm; return NULL; } - - csm->options=options; - return csm; + return CaseMapFriend::toUCaseMap(*csm); } U_CAPI void U_EXPORT2 ucasemap_close(UCaseMap *csm) { if(csm!=NULL) { -#if !UCONFIG_NO_BREAK_ITERATION - // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. - delete reinterpret_cast(csm->iter); -#endif - uprv_free(csm); + delete CaseMapFriend::fromUCaseMap(csm); } } U_CAPI const char * U_EXPORT2 ucasemap_getLocale(const UCaseMap *csm) { - return csm->locale; + return CaseMapFriend::localeID(*CaseMapFriend::fromUCaseMap(csm)); } U_CAPI uint32_t U_EXPORT2 ucasemap_getOptions(const UCaseMap *csm) { - return csm->options; + return CaseMapFriend::options(*CaseMapFriend::fromUCaseMap(csm)); } U_CAPI void U_EXPORT2 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { - int32_t length; - if(U_FAILURE(*pErrorCode)) { return; } - - length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { - *pErrorCode=U_ZERO_ERROR; - /* we only really need the language code for case mappings */ - length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); - } - if(length==sizeof(csm->locale)) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - csm->locCache=0; - if(U_SUCCESS(*pErrorCode)) { - ucase_getCaseLocale(csm->locale, &csm->locCache); - } else { - csm->locale[0]=0; - } + CaseMapFriend::setLocale(*CaseMapFriend::fromUCaseMap(csm), locale, *pErrorCode); } U_CAPI void U_EXPORT2 -ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode * /*pErrorCode*/) { - csm->options=options; +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + CaseMapFriend::setOptions(*CaseMapFriend::fromUCaseMap(csm), options); } /* UTF-8 string case mappings ----------------------------------------------- */ @@ -258,7 +233,7 @@ utf8_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const UCaseMap *csm, UCaseMapFull *map, +_caseMap(const CaseMap &csm, UCaseMapFull *map, uint8_t *dest, int32_t destCapacity, const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, @@ -268,7 +243,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, int32_t srcIndex, destIndex; int32_t locCache; - locCache=csm->locCache; + locCache = CaseMapFriend::caseLocale(csm); /* case mapping loop */ srcIndex=srcStart; @@ -286,7 +261,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, } continue; } - c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); + c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &locCache); if((destIndex(csm->iter); - /* set up local variables */ - int32_t locCache=csm->locCache; + int32_t locCache=CaseMapFriend::caseLocale(csm); UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; @@ -339,9 +310,9 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, /* find next index where to titlecase */ if(isFirstIndex) { isFirstIndex=FALSE; - idx=bi->first(); + idx=iter->first(); } else { - idx=bi->next(); + idx=iter->next(); } if(idx==UBRK_DONE || idx>srcLength) { idx=srcLength; @@ -364,7 +335,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, /* find and copy uncased characters [prev..titleStart[ */ titleStart=titleLimit=prev; U8_NEXT(src, titleLimit, idx, c); - if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { + if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) { /* Adjust the titlecasing index (titleStart) to the next cased character. */ for(;;) { titleStart=titleLimit; @@ -376,7 +347,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, break; } U8_NEXT(src, titleLimit, idx, c); - if(UCASE_NONE!=ucase_getType(csm->csp, c)) { + if(UCASE_NONE!=ucase_getType(NULL, c)) { break; /* cased letter at [titleStart..titleLimit[ */ } } @@ -392,7 +363,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, if(c>=0) { csc.cpStart=titleStart; csc.cpLimit=titleLimit; - c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, &csc, &s, csm->locale, &locCache); + c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s); } else { // Malformed UTF-8. @@ -405,7 +376,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, /* Special case Dutch IJ titlecasing */ if (titleStart+1 < idx && - ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_DUTCH && + locCache == UCASE_LOC_DUTCH && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); @@ -413,7 +384,7 @@ ucasemap_internalUTF8ToTitle(const UCaseMap *csm, } /* lowercase [titleLimit..index[ */ if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { + if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ destIndex+= _caseMap( @@ -471,7 +442,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const uint8_t *s, int32_t i } // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. -int32_t toUpper(const UCaseMap *csm, +int32_t toUpper(const CaseMap & /* unused csm */, uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UErrorCode *pErrorCode) { @@ -483,7 +454,7 @@ int32_t toUpper(const UCaseMap *csm, UChar32 c; U8_NEXT(src, nextIndex, srcLength, c); uint32_t nextState = 0; - int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); + int32_t type = ucase_getTypeOrIgnorable(NULL, c); if ((type & UCASE_IGNORABLE) != 0) { // c is case-ignorable nextState |= (state & AFTER_CASED); @@ -533,7 +504,7 @@ int32_t toUpper(const UCaseMap *csm, (data & HAS_ACCENT) != 0 && numYpogegrammeni == 0 && (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { + !isFollowedByCasedLetter(NULL, src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. if (i == nextIndex) { @@ -569,7 +540,7 @@ int32_t toUpper(const UCaseMap *csm, } else if(c>=0) { const UChar *s; UChar32 c2 = 0; - c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); + c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache); if((destIndexlocCache; - if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { + int32_t locCache = CaseMapFriend::caseLocale(csm); + if (locCache == UCASE_LOC_GREEK) { return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, pErrorCode); } UCaseContext csc=UCASECONTEXT_INITIALIZER; @@ -635,12 +606,11 @@ ucasemap_internalUTF8ToUpper(const UCaseMap *csm, pErrorCode); } -static int32_t -utf8_foldCase(const UCaseProps *csp, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode) { +static int32_t U_CALLCONV +ucasemap_internalUTF8Fold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode) { int32_t srcIndex, destIndex; const UChar *s; @@ -661,7 +631,7 @@ utf8_foldCase(const UCaseProps *csp, } continue; } - c=ucase_toFullFolding(csp, c, &s, options); + c=ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm)); if((destIndexcsp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); -} - U_CFUNC int32_t -ucasemap_mapUTF8(const UCaseMap *csm, +ucasemap_mapUTF8(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper, @@ -723,7 +685,8 @@ ucasemap_mapUTF8(const UCaseMap *csm, return 0; } - destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, pErrorCode); + destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + dest, destCapacity, src, srcLength, pErrorCode); return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); } @@ -734,10 +697,11 @@ ucasemap_utf8ToLower(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToLower, pErrorCode); + return ucasemap_mapUTF8( + *CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToLower, pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -745,10 +709,11 @@ ucasemap_utf8ToUpper(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToUpper, pErrorCode); + return ucasemap_mapUTF8( + *CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToUpper, pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -756,8 +721,9 @@ ucasemap_utf8FoldCase(const UCaseMap *csm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { - return ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8Fold, pErrorCode); + return ucasemap_mapUTF8( + *CaseMapFriend::fromUCaseMap(csm), UCASEMAP_BREAK_ITERATOR_NULL + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8Fold, pErrorCode); } diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp index 4b607114171..592227681cd 100644 --- a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp +++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp @@ -30,35 +30,47 @@ U_NAMESPACE_USE +using icu::internal::CaseMapFriend; + U_CAPI const UBreakIterator * U_EXPORT2 ucasemap_getBreakIterator(const UCaseMap *csm) { - return reinterpret_cast(csm->iter); + return reinterpret_cast( + CaseMapFriend::iter(*CaseMapFriend::fromUCaseMap(csm))); } U_CAPI void U_EXPORT2 -ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { - delete csm->iter; - csm->iter=reinterpret_cast(iterToAdopt); +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + CaseMapFriend::adoptIter(*CaseMapFriend::fromUCaseMap(csm), + reinterpret_cast(iterToAdopt)); } U_CAPI int32_t U_EXPORT2 -ucasemap_utf8ToTitle(UCaseMap *csm, +ucasemap_utf8ToTitle(UCaseMap *ucsm, char *dest, int32_t destCapacity, const char *src, int32_t srcLength, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm); UText utext=UTEXT_INITIALIZER; utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); - if(csm->iter==NULL) { - csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); + if (CaseMapFriend::iter(csm) == NULL) { + CaseMapFriend::adoptIter( + csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode)); } - if(U_FAILURE(*pErrorCode)) { + if (U_FAILURE(*pErrorCode)) { return 0; } - csm->iter->setText(&utext, *pErrorCode); + CaseMapFriend::mutableIter(csm)->setText(&utext, *pErrorCode); int32_t length=ucasemap_mapUTF8(csm, - (uint8_t *)dest, destCapacity, - (const uint8_t *)src, srcLength, - ucasemap_internalUTF8ToTitle, pErrorCode); + CaseMapFriend::mutableIter(csm), + (uint8_t *)dest, destCapacity, + (const uint8_t *)src, srcLength, + ucasemap_internalUTF8ToTitle, pErrorCode); utext_close(&utext); return length; } diff --git a/icu4c/source/common/unicode/ucasemap.h b/icu4c/source/common/unicode/ucasemap.h index 6f00b072e3a..b38c8ce9a93 100644 --- a/icu4c/source/common/unicode/ucasemap.h +++ b/icu4c/source/common/unicode/ucasemap.h @@ -25,6 +25,7 @@ #include "unicode/localpointer.h" #if U_SHOW_CPLUSPLUS_API +#include "unicode/locid.h" #include "unicode/uobject.h" #endif // U_SHOW_CPLUSPLUS_API @@ -101,7 +102,9 @@ class BreakIterator; */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); -#ifndef U_HIDE_INTERNAL_API +// TODO: move to new C++ unicode/casemap.h + +#ifndef U_HIDE_DRAFT_API /** * Records lengths of string edits but not replacement text. @@ -111,13 +114,13 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); * An Edits object tracks a separate UErrorCode, but ICU case mapping functions * merge any such errors into their API's UErrorCode. * - * @internal ICU 59 technology preview + * @draft ICU 59 */ -class Edits final : public UMemory { +class U_COMMON_API Edits final : public UMemory { public: /** * Constructs an empty object. - * @internal ICU 59 technology preview + * @draft ICU 59 */ Edits() : array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), @@ -126,7 +129,7 @@ public: /** * Resets the data but may not release memory. - * @internal ICU 59 technology preview + * @draft ICU 59 */ void reset(); @@ -137,7 +140,7 @@ public: * to the original string. * @see omitUnchanged * @see writeUnchanged - * @internal ICU 59 technology preview + * @draft ICU 59 */ Edits &setWriteUnchanged(UBool write) { omit = !write; @@ -146,26 +149,26 @@ public: /** * @return TRUE if the case mapping function is to omit characters that do not change. * @see setWriteUnchanged - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool omitUnchanged() const { return omit; } /** * @return TRUE if the case mapping function is to write characters that do not change. * @see setWriteUnchanged - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool writeUnchanged() const { return !omit; } /** * Adds a record for an unchanged segment of text. * Normally called from inside ICU case mapping functions, not user code. - * @internal ICU 59 technology preview + * @draft ICU 59 */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. * Normally called from inside ICU case mapping functions, not user code. - * @internal ICU 59 technology preview + * @draft ICU 59 */ void addReplace(int32_t oldLength, int32_t newLength); /** @@ -173,19 +176,19 @@ public: * Preserves older error codes in the outErrorCode. * Normally called from inside ICU case mapping functions, not user code. * @return TRUE if U_FAILURE(outErrorCode) - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool copyErrorTo(UErrorCode &outErrorCode); /** * How much longer is the new text compared with the old text? * @return new length minus old length - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t lengthDelta() const { return delta; } /** * @return TRUE if there are any change edits - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool hasChanges() const; @@ -193,13 +196,13 @@ public: * Access to the list of edits. * @see getCoarseIterator * @see getFineIterator - * @internal ICU 59 technology preview + * @draft ICU 59 */ struct Iterator final : public UMemory { /** * Advances to the next edit. * @return TRUE if there is another edit - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool next(UErrorCode &errorCode); @@ -217,42 +220,42 @@ public: * * @param i source index * @return TRUE if the edit for the source index was found - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool findSourceIndex(int32_t i, UErrorCode &errorCode); /** * @return TRUE if this edit replaces oldLength() units with newLength() different ones. * FALSE if oldLength units remain unchanged. - * @internal ICU 59 technology preview + * @draft ICU 59 */ UBool hasChange() const { return changed; } /** * @return the number of units in the original string which are replaced or remain unchanged. - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t oldLength() const { return oldLength_; } /** * @return the number of units in the modified string, if hasChange() is TRUE. * Same as oldLength if hasChange() is FALSE. - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t newLength() const { return newLength_; } /** * @return the current index into the source string - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t sourceIndex() const { return srcIndex; } /** * @return the current index into the replacement-characters-only string, * not counting unchanged spans - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t replacementIndex() const { return replIndex; } /** * @return the current index into the full destination string - * @internal ICU 59 technology preview + * @draft ICU 59 */ int32_t destinationIndex() const { return destIndex; } @@ -279,7 +282,7 @@ public: * Returns an Iterator for coarse-grained changes for simple string updates. * Skips non-changes. * @return an Iterator that merges adjacent changes. - * @internal ICU 59 technology preview + * @draft ICU 59 */ Iterator getCoarseChangesIterator() const { return Iterator(array, length, TRUE, TRUE); @@ -288,7 +291,7 @@ public: /** * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. * @return an Iterator that merges adjacent changes. - * @internal ICU 59 technology preview + * @draft ICU 59 */ Iterator getCoarseIterator() const { return Iterator(array, length, FALSE, TRUE); @@ -298,7 +301,7 @@ public: * Returns an Iterator for fine-grained changes for modifying styled text. * Skips non-changes. * @return an Iterator that separates adjacent changes. - * @internal ICU 59 technology preview + * @draft ICU 59 */ Iterator getFineChangesIterator() const { return Iterator(array, length, TRUE, FALSE); @@ -307,7 +310,7 @@ public: /** * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. * @return an Iterator that separates adjacent changes. - * @internal ICU 59 technology preview + * @draft ICU 59 */ Iterator getFineIterator() const { return Iterator(array, length, FALSE, FALSE); @@ -334,30 +337,209 @@ private: uint16_t stackArray[STACK_CAPACITY]; }; -/** - * Omit unchanged text when case-mapping with Edits. - * - * TODO: revisit which bit to use; currently: - * - 31..20: old normalization options (only deprecated Unicode 3.2) - * shifted up for unorm_compare() - * - 19..16: more options specific to unorm_compare() (currently bits 19, 17, 16) - * - 15..12: more string compare options (currently bits 15 & 12) - * - 11.. 8: titlecase mapping options (currently bits 9..8) - * - 7.. 0: case folding options, but only bit 0 currently used - * - * could overlay any normalization and string *comparison* option bits - * with case *mapping* option bits - * *unless* we start using UCaseMap for string comparison functions - * - * future: German sharp s may need locale variant or option bit - * - * @internal ICU 59 technology preview - */ -// TODO: does not work well as an option because we would need to set/reset it on UCaseMaps -// that are often const, replaced for now by Edits.setWriteUnchanged(UBool) -// #define UCASEMAP_OMIT_UNCHANGED 0x4000 +namespace internal { +/** @internal ICU implementation detail */ +class CaseMapFriend; +} // namespace internal + +class U_COMMON_API CaseMap final : public UMemory { +public: + /** + * Constructor for the root locale and options. + * Explicitly construct with Locale::getDefault() for the default locale. + * @draft ICU 59 + */ + inline CaseMap(uint32_t options, UErrorCode &errorCode); + /** + * Constructor for locale and options. + * @draft ICU 59 + */ + CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode); + /** + * Constructor for locale ID and options. + * @draft ICU 59 + */ + CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode); + + /** + * Destructor. + * @draft ICU 59 + */ + ~CaseMap(); + +// TODO: reverse src & dest? C vs. C++ conventions + + /** + * Lowercases the characters in a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToLower + * @draft ICU 59 + */ + int32_t toLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) const; + + /** + * Uppercases the characters in a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToLower + * @draft ICU 59 + */ + int32_t toUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) const; + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with ucasemap_setOptions().) + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string and used one or more times for iteration. + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToTitle + * @draft ICU 59 + */ + int32_t toTitle(BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) const; + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds the characters in a UTF-16 string and optionally records edits. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strFoldCase + * @see ucasemap_setOptions + * @see U_FOLD_CASE_DEFAULT + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @draft ICU 59 + */ + int32_t foldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) const; + +private: + friend class internal::CaseMapFriend; + + CaseMap(const CaseMap &other) = delete; + CaseMap &operator=(const CaseMap &other) = delete; + + CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode &errorCode); + + void setCaseLocale(const char *localeID); + void setLocale(const char *localeID, UErrorCode &errorCode); + + int32_t caseLocale; + uint32_t options; + Locale locale; +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter; // owned; only set by old C-style API +#endif +}; + +CaseMap::CaseMap(uint32_t opts, UErrorCode & /*errorCode*/) : + caseLocale(/* UCASE_LOC_ROOT = */ 1), options(opts), locale(Locale::getRoot()) +#if !UCONFIG_NO_BREAK_ITERATION + , iter(NULL) +#endif + {} -#endif // U_HIDE_INTERNAL_API +#endif // U_HIDE_DRAFT_API U_NAMESPACE_END @@ -450,168 +632,6 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); */ #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 -#if U_SHOW_CPLUSPLUS_API -#ifndef U_HIDE_INTERNAL_API - -/** - * Lowercases the characters in a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToLower - * @internal ICU 59 technology preview - */ -U_CAPI int32_t U_EXPORT2 -ucasemap_toLowerWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -/** - * Uppercases the characters in a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToLower - * @internal ICU 59 technology preview - */ -U_CAPI int32_t U_EXPORT2 -ucasemap_toUpperWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * Titlecases a UTF-16 string and optionally records edits. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with ucasemap_setOptions().) - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param csm UCaseMap service object. - * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string and used one or more times for iteration. - * If NULL, then a clone of ucasemap_getBreakIterator() is used. - * If that is NULL too, then a word break iterator for the locale is used - * (or something equivalent). - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strToTitle - * @internal ICU 59 technology preview - */ -U_CAPI int32_t U_EXPORT2 -ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#endif // UCONFIG_NO_BREAK_ITERATION - -/** - * Case-folds the characters in a UTF-16 string and optionally records edits. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer must not overlap. - * - * @param csm UCaseMap service object. - * @param dest A buffer for the result string. The result will be NUL-terminated if - * the buffer is large enough. - * The contents is undefined in case of failure. - * @param destCapacity The size of the buffer (number of bytes). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string. - * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. - * @param edits Records edits for index mapping, working with styled text, - * and getting only changes (if any). Can be NULL. - * @param errorCode Reference to an in/out error code value - * which must not indicate a failure before the function call. - * @return The length of the result string, if successful - or in case of a buffer overflow, - * in which case it will be greater than destCapacity. - * - * @see u_strFoldCase - * @see ucasemap_setOptions - * @see U_FOLD_CASE_DEFAULT - * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @internal ICU 59 technology preview - */ -U_CAPI int32_t U_EXPORT2 -ucasemap_foldCaseWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#endif // U_HIDE_INTERNAL_API -#endif // U_SHOW_CPLUSPLUS_API - #if !UCONFIG_NO_BREAK_ITERATION /** @@ -849,25 +869,4 @@ ucasemap_utf8FoldCase(const UCaseMap *csm, const char *src, int32_t srcLength, UErrorCode *pErrorCode); -#if U_SHOW_CPLUSPLUS_API - -// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. -/** - * Internal string case mapping function type. - * All error checking must be done. - * The UCaseMap must be fully initialized, with locale and/or iter set as needed. - * src and dest must not overlap. - * @internal - */ -typedef int32_t U_CALLCONV -UStringCaseMapper(const UCaseMap *csm, -#if !UCONFIG_NO_BREAK_ITERATION - icu::BreakIterator *iter, -#endif - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode); - -#endif // U_SHOW_CPLUSPLUS_API #endif diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 350828559da..ed994dbd6b4 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -33,7 +33,6 @@ #include "unicode/std_string.h" #include "unicode/stringpiece.h" #include "unicode/bytestream.h" -#include "unicode/ucasemap.h" struct UConverter; // unicode/ucnv.h @@ -60,6 +59,30 @@ U_NAMESPACE_BEGIN #if !UCONFIG_NO_BREAK_ITERATION class BreakIterator; // unicode/brkiter.h #endif +class CaseMap; +class Edits; + +U_NAMESPACE_END + +// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. +/** + * Internal string case mapping function type. + * All error checking must be done. + * src and dest must not overlap. + * @internal + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(const icu::CaseMap &csm, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +U_NAMESPACE_BEGIN + class Locale; // unicode/locid.h class StringCharacterIterator; class UnicodeStringAppendable; // unicode/appendable.h @@ -3573,7 +3596,7 @@ private: * as in ustr_imp.h for ustrcase_map(). */ UnicodeString & - caseMap(const UCaseMap *csm, + caseMap(const CaseMap &csm, #if !UCONFIG_NO_BREAK_ITERATION BreakIterator *iter, #endif diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index d48831b4b10..a3b80b0666c 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start, //======================================== UnicodeString & -UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +UnicodeString::caseMap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UStringCaseMapper *stringCaseMapper) { if(isEmpty() || !isWritable()) { // nothing to do @@ -194,10 +194,9 @@ UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UnicodeString & UnicodeString::foldCase(uint32_t options) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.csp=ucase_getSingleton(); - csm.options=options; - return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); + UErrorCode errorCode = U_ZERO_ERROR; + CaseMap csm(options, errorCode); + return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_case_locale.cpp b/icu4c/source/common/unistr_case_locale.cpp index 4bf55777043..2a08c9b3180 100644 --- a/icu4c/source/common/unistr_case_locale.cpp +++ b/icu4c/source/common/unistr_case_locale.cpp @@ -19,8 +19,8 @@ #include "unicode/utypes.h" #include "unicode/locid.h" +#include "unicode/ucasemap.h" #include "unicode/unistr.h" -#include "cmemory.h" #include "ustr_imp.h" U_NAMESPACE_BEGIN @@ -29,22 +29,6 @@ U_NAMESPACE_BEGIN // Write implementation //======================================== -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } -} - UnicodeString & UnicodeString::toLower() { return toLower(Locale::getDefault()); @@ -52,9 +36,9 @@ UnicodeString::toLower() { UnicodeString & UnicodeString::toLower(const Locale &locale) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); + UErrorCode errorCode = U_ZERO_ERROR; + CaseMap csm(locale, 0, errorCode); + return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & @@ -64,9 +48,9 @@ UnicodeString::toUpper() { UnicodeString & UnicodeString::toUpper(const Locale &locale) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); + UErrorCode errorCode = U_ZERO_ERROR; + CaseMap csm(locale, 0, errorCode); + return caseMap(csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_titlecase_brkiter.cpp b/icu4c/source/common/unistr_titlecase_brkiter.cpp index 05d38fb5822..24f60c1e289 100644 --- a/icu4c/source/common/unistr_titlecase_brkiter.cpp +++ b/icu4c/source/common/unistr_titlecase_brkiter.cpp @@ -22,27 +22,11 @@ #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/ucasemap.h" #include "unicode/unistr.h" -#include "unicode/ustring.h" -#include "cmemory.h" #include "ustr_imp.h" -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } -} - U_NAMESPACE_BEGIN UnicodeString & @@ -57,12 +41,10 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { UnicodeString & UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.options=options; - setTempCaseMap(&csm, locale.getName()); + UErrorCode errorCode = U_ZERO_ERROR; + CaseMap csm(locale, options, errorCode); BreakIterator *bi=titleIter; if(bi==NULL) { - UErrorCode errorCode=U_ZERO_ERROR; bi=BreakIterator::createWordInstance(locale, errorCode); if(U_FAILURE(errorCode)) { setToBogus(); @@ -70,7 +52,7 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t } } bi->setText(*this); - caseMap(&csm, bi, ustrcase_internalToTitle); + caseMap(csm, bi, ustrcase_internalToTitle); if(titleIter==NULL) { delete bi; } diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index 52a44545a0f..b6d160856fe 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -104,21 +104,48 @@ uprv_loadPropsData(UErrorCode *errorCode);*/ #ifdef __cplusplus // TODO: Consider moving these case mapping definitions -// into a new internal header like ucasemap_imp.h. +// into a new internal header like casemap_imp.h. + +#include "unicode/unistr.h" // for UStringCaseMapper /* * Internal string casing functions implementing * ustring.h/ustrcase.c and UnicodeString case mapping functions. */ -struct UCaseMap { - const UCaseProps *csp; +/** Avoid public @internal CaseMap methods. Define only one CaseMap friend. */ +class icu::internal::CaseMapFriend final /* all static */ { +public: + static UCaseMap *toUCaseMap(icu::CaseMap &csm) { + return reinterpret_cast(&csm); + } + + static const icu::CaseMap *fromUCaseMap(const UCaseMap *csm) { + return reinterpret_cast(csm); + } + static icu::CaseMap *fromUCaseMap(UCaseMap *csm) { + return reinterpret_cast(csm); + } + #if !UCONFIG_NO_BREAK_ITERATION - icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ + static const icu::BreakIterator *iter(const icu::CaseMap &csm) { return csm.iter; } + static icu::BreakIterator *mutableIter(icu::CaseMap &csm) { return csm.iter; } + static void adoptIter(icu::CaseMap &csm, icu::BreakIterator *iter); #endif - char locale[32]; - int32_t locCache; - uint32_t options; + + static const icu::Locale &locale(const icu::CaseMap &csm) { return csm.locale; } + static const char *localeID(const icu::CaseMap &csm) { return csm.locale.getName(); } + static void setLocale(icu::CaseMap &csm, const char *localeID, UErrorCode &errorCode) { + csm.setLocale(localeID, errorCode); + } + + static int32_t caseLocale(const icu::CaseMap &csm) { return csm.caseLocale; } + + static uint32_t options(const icu::CaseMap &csm) { return csm.options; } + static void setOptions(icu::CaseMap &csm, uint32_t options) { csm.options = options; } + +private: + CaseMapFriend() = delete; }; #if UCONFIG_NO_BREAK_ITERATION @@ -135,12 +162,9 @@ struct UCaseMap { # define UCASEMAP_BREAK_ITERATOR_NULL NULL, #endif -U_CFUNC void -ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale); - /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalToLower(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -148,7 +172,7 @@ ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalToUpper(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -158,7 +182,7 @@ ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const UCaseMap *csm, +ustrcase_internalToTitle(const icu::CaseMap &csm, icu::BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, @@ -169,7 +193,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_internalFold(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -180,7 +204,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM * Implements argument checking. */ U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_map(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -193,7 +217,7 @@ ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM * Implements argument checking and internally works with an intermediate buffer if necessary. */ U_CFUNC int32_t -ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_mapWithOverlap(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -207,24 +231,32 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM * src and dest must not overlap. */ typedef int32_t U_CALLCONV -UTF8CaseMapper(const UCaseMap *csm, +UTF8CaseMapper(const icu::CaseMap &csm, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UErrorCode *pErrorCode); +#if !UCONFIG_NO_BREAK_ITERATION + /** Implements UTF8CaseMapper. */ U_CFUNC int32_t U_CALLCONV -ucasemap_internalUTF8ToTitle(const UCaseMap *csm, - uint8_t *dest, int32_t destCapacity, - const uint8_t *src, int32_t srcLength, - UErrorCode *pErrorCode); +ucasemap_internalUTF8ToTitle(const icu::CaseMap &csm, + icu::BreakIterator *iter, + uint8_t *dest, int32_t destCapacity, + const uint8_t *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif /** * Implements argument checking and buffer handling * for UTF-8 string case mapping as a common function. */ U_CFUNC int32_t -ucasemap_mapUTF8(const UCaseMap *csm, +ucasemap_mapUTF8(const icu::CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM uint8_t *dest, int32_t destCapacity, const uint8_t *src, int32_t srcLength, UTF8CaseMapper *stringCaseMapper, diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp index bf4f252c541..e70c863ee24 100644 --- a/icu4c/source/common/ustr_titlecase_brkiter.cpp +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -29,25 +29,19 @@ #include "ucase.h" #include "ustr_imp.h" -/* functions available in the common library (for unistr_case.cpp) */ +U_NAMESPACE_USE -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - * Duplicate of the same function in ustrcase.cpp, to keep it inline. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); - } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; - } else { - ustrcase_setTempCaseMapLocale(csm, locale); - } +using icu::internal::CaseMapFriend; + +// TODO: create casemap.cpp + +void icu::internal::CaseMapFriend::adoptIter(CaseMap &csm, BreakIterator *iter) { + delete csm.iter; + csm.iter = iter; } +/* functions available in the common library (for unistr_case.cpp) */ + /* public API functions */ U_CAPI int32_t U_EXPORT2 @@ -56,70 +50,76 @@ u_strToTitle(UChar *dest, int32_t destCapacity, UBreakIterator *titleIter, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); - icu::LocalPointer ownedIter; - icu::BreakIterator *iter; + CaseMap csm(locale, 0, *pErrorCode); + BreakIterator *iter; if(titleIter!=NULL) { - iter=reinterpret_cast(titleIter); + iter=reinterpret_cast(titleIter); } else { - iter=icu::BreakIterator::createWordInstance(icu::Locale(csm.locale), *pErrorCode); - ownedIter.adoptInstead(iter); + iter=BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode); + CaseMapFriend::adoptIter(csm, iter); } if(U_FAILURE(*pErrorCode)) { return 0; } - icu::UnicodeString s(srcLength<0, src, srcLength); + UnicodeString s(srcLength<0, src, srcLength); iter->setText(s); return ustrcase_mapWithOverlap( - &csm, iter, + csm, iter, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, *pErrorCode); } -U_CAPI int32_t U_EXPORT2 -ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { - icu::LocalPointer ownedIter; - if(iter==NULL) { - if(csm->iter!=NULL) { - iter=csm->iter->clone(); +U_NAMESPACE_BEGIN + +int32_t CaseMap::toTitle(BreakIterator *it, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) const { + LocalPointer ownedIter; + if(it==NULL) { + if(iter!=NULL) { + it=iter->clone(); } else { - iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), errorCode); + it=BreakIterator::createWordInstance(locale, errorCode); } - ownedIter.adoptInsteadAndCheckErrorCode(iter, errorCode); + ownedIter.adoptInsteadAndCheckErrorCode(it, errorCode); } if(U_FAILURE(errorCode)) { return 0; } - icu::UnicodeString s(srcLength<0, src, srcLength); - iter->setText(s); + UnicodeString s(srcLength<0, src, srcLength); + it->setText(s); return ustrcase_map( - csm, iter, + *this, it, dest, destCapacity, src, srcLength, ustrcase_internalToTitle, edits, errorCode); } +U_NAMESPACE_END + U_CAPI int32_t U_EXPORT2 -ucasemap_toTitle(UCaseMap *csm, +ucasemap_toTitle(UCaseMap *ucsm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { - if(csm->iter==NULL) { - csm->iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; } - if(U_FAILURE(*pErrorCode)) { + CaseMap &csm = *CaseMapFriend::fromUCaseMap(ucsm); + if (CaseMapFriend::iter(csm) == NULL) { + CaseMapFriend::adoptIter( + csm, BreakIterator::createWordInstance(CaseMapFriend::locale(csm), *pErrorCode)); + } + if (U_FAILURE(*pErrorCode)) { return 0; } - icu::UnicodeString s(srcLength<0, src, srcLength); - csm->iter->setText(s); + UnicodeString s(srcLength<0, src, srcLength); + CaseMapFriend::mutableIter(csm)->setText(s); return ustrcase_map( - csm, csm->iter, + csm, CaseMapFriend::mutableIter(csm), dest, destCapacity, src, srcLength, ustrcase_internalToTitle, NULL, *pErrorCode); diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index c437683c6f4..1b0424e14eb 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -32,6 +32,10 @@ #include "ustr_imp.h" #include "uassert.h" +U_NAMESPACE_USE + +using icu::internal::CaseMapFriend; + U_NAMESPACE_BEGIN namespace { @@ -538,13 +542,13 @@ utf16_caseContextIterator(void *context, int8_t dir) { * context [0..srcLength[ into account. */ static int32_t -_caseMap(const UCaseMap *csm, UCaseMapFull *map, +_caseMap(const CaseMap &csm, UCaseMapFull *map, UChar *dest, int32_t destCapacity, const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, icu::Edits *edits, UErrorCode &errorCode) { - int32_t locCache=csm->locCache; + int32_t locCache = CaseMapFriend::caseLocale(csm); /* case mapping loop */ int32_t srcIndex=srcStart; @@ -556,7 +560,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, U16_NEXT(src, srcIndex, srcLimit, c); csc->cpLimit=srcIndex; const UChar *s; - c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); + c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &locCache); destIndex = appendResult(dest, destIndex, destCapacity, c, s, srcIndex - cpStart, edits); if (destIndex < 0) { @@ -571,7 +575,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, +ustrcase_internalToTitle(const CaseMap &csm, BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -581,7 +585,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, } /* set up local variables */ - int32_t locCache=csm->locCache; + int32_t locCache=CaseMapFriend::caseLocale(csm); UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; @@ -622,7 +626,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, int32_t titleLimit=prev; UChar32 c; U16_NEXT(src, titleLimit, idx, c); - if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { + if((CaseMapFriend::options(csm)&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) { /* Adjust the titlecasing index (titleStart) to the next cased character. */ for(;;) { titleStart=titleLimit; @@ -634,7 +638,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, break; } U16_NEXT(src, titleLimit, idx, c); - if(UCASE_NONE!=ucase_getType(csm->csp, c)) { + if(UCASE_NONE!=ucase_getType(NULL, c)) { break; /* cased letter at [titleStart..titleLimit[ */ } } @@ -651,8 +655,8 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, csc.cpStart=titleStart; csc.cpLimit=titleLimit; const UChar *s; - c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, - csm->locale, &locCache); + c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s, + NULL, &locCache); destIndex=appendResult(dest, destIndex, destCapacity, c, s, titleLimit-titleStart, edits); if(destIndex<0) { @@ -662,7 +666,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, /* Special case Dutch IJ titlecasing */ if (titleStart+1 < idx && - ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && + locCache == UCASE_LOC_DUTCH && (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) && (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); @@ -678,7 +682,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, /* lowercase [titleLimit..index[ */ if(titleLimitoptions&U_TITLECASE_NO_LOWERCASE)==0) { + if((CaseMapFriend::options(csm)&U_TITLECASE_NO_LOWERCASE)==0) { /* Normal operation: Lowercase the rest of the word. */ destIndex+= _caseMap( @@ -1193,7 +1197,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i, * for each character. * TODO: Try to re-consolidate one way or another with the non-Greek function. */ -int32_t toUpper(const UCaseMap *csm, +int32_t toUpper(const CaseMap & /* unused csm */, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, Edits *edits, @@ -1206,7 +1210,7 @@ int32_t toUpper(const UCaseMap *csm, UChar32 c; U16_NEXT(src, nextIndex, srcLength, c); uint32_t nextState = 0; - int32_t type = ucase_getTypeOrIgnorable(csm->csp, c); + int32_t type = ucase_getTypeOrIgnorable(NULL, c); if ((type & UCASE_IGNORABLE) != 0) { // c is case-ignorable nextState |= (state & AFTER_CASED); @@ -1253,7 +1257,7 @@ int32_t toUpper(const UCaseMap *csm, (data & HAS_ACCENT) != 0 && numYpogegrammeni == 0 && (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(csm->csp, src, nextIndex, srcLength)) { + !isFollowedByCasedLetter(NULL, src, nextIndex, srcLength)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. if (i == nextIndex) { @@ -1322,7 +1326,7 @@ int32_t toUpper(const UCaseMap *csm, } } else { const UChar *s; - c=ucase_toFullUpper(csm->csp, c, NULL, NULL, &s, csm->locale, &locCache); + c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &locCache); destIndex = appendResult(dest, destIndex, destCapacity, c, s, nextIndex - i, edits); if (destIndex < 0) { @@ -1343,7 +1347,7 @@ U_NAMESPACE_END /* functions available in the common library (for unistr_case.cpp) */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalToLower(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -1360,13 +1364,13 @@ ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalToUpper(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode) { - int32_t locCache = csm->locCache; - if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { + int32_t locCache = CaseMapFriend::caseLocale(csm); + if (locCache == UCASE_LOC_GREEK) { return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode); } UCaseContext csc=UCASECONTEXT_INITIALIZER; @@ -1381,7 +1385,7 @@ ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED +ustrcase_internalFold(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, @@ -1394,7 +1398,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar32 c; U16_NEXT(src, srcIndex, srcLength, c); const UChar *s; - c = ucase_toFullFolding(csm->csp, c, &s, csm->options); + c = ucase_toFullFolding(NULL, c, &s, CaseMapFriend::options(csm)); destIndex = appendResult(dest, destIndex, destCapacity, c, s, srcIndex - cpStart, edits); if (destIndex < 0) { @@ -1407,7 +1411,7 @@ ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED } U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_map(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -1448,7 +1452,7 @@ ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM } U_CFUNC int32_t -ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM +ustrcase_mapWithOverlap(const CaseMap &csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, @@ -1519,55 +1523,50 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, uint32_t options, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - csm.csp=ucase_getSingleton(); - csm.options=options; return ustrcase_mapWithOverlap( - &csm, UCASEMAP_BREAK_ITERATOR_NULL + CaseMap(options, *pErrorCode), UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalFold, *pErrorCode); } -U_CAPI int32_t U_EXPORT2 -ucasemap_toLowerWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { +U_NAMESPACE_BEGIN + +int32_t CaseMap::toLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) const { return ustrcase_map( - csm, UCASEMAP_BREAK_ITERATOR_NULL + *this, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToLower, edits, errorCode); } -U_CAPI int32_t U_EXPORT2 -ucasemap_toUpperWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { +int32_t CaseMap::toUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) const { return ustrcase_map( - csm, UCASEMAP_BREAK_ITERATOR_NULL + *this, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToUpper, edits, errorCode); } -U_CAPI int32_t U_EXPORT2 -ucasemap_foldCaseWithEdits(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode &errorCode) { +int32_t CaseMap::foldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) const { return ustrcase_map( - csm, UCASEMAP_BREAK_ITERATOR_NULL + *this, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalFold, edits, errorCode); } +U_NAMESPACE_END + /* case-insensitive string comparisons -------------------------------------- */ /* diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp index 5e6e1418247..b71f5dcad53 100644 --- a/icu4c/source/common/ustrcase_locale.cpp +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -18,69 +18,71 @@ */ #include "unicode/utypes.h" +#include "uassert.h" +#include "unicode/brkiter.h" #include "unicode/ucasemap.h" #include "unicode/uloc.h" #include "unicode/ustring.h" #include "ucase.h" #include "ustr_imp.h" -U_CFUNC void -ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale) { - /* - * We could call ucasemap_setLocale(), but here we really only care about - * the initial language subtag, we need not return the real string via - * ucasemap_getLocale(), and we don't care about only getting "x" from - * "x-some-thing" etc. - * - * We ignore locales with a longer-than-3 initial subtag. - * - * We also do not fill in the locCache because it is rarely used, - * and not worth setting unless we reuse it for many case mapping operations. - * (That's why UCaseMap was created.) - */ - int i; - char c; +U_NAMESPACE_BEGIN - /* the internal functions require locale!=NULL */ - if(locale==NULL) { - // Do not call uprv_getDefaultLocaleID() because that does not see - // changes to the default locale via uloc_setDefault(). - // It would also be inefficient if used frequently because uprv_getDefaultLocaleID() - // does not cache the locale ID. - // - // Unfortunately, uloc_getDefault() has many dependencies. - // We only care about a small set of language subtags, - // and we do not need the locale ID to be canonicalized. - // - // Best is to not call case mapping functions with a NULL locale ID. - locale=uloc_getDefault(); - } - for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { - csm->locale[i]=c; - } - if(i<=3) { - csm->locale[i]=0; /* Up to 3 non-separator characters. */ - } else { - csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ +// TODO: new casemap_locale.cpp + +CaseMap::CaseMap(const Locale &loc, int32_t caseLoc, uint32_t opts, UErrorCode & /*errorCode*/) : + caseLocale(caseLoc), options(opts), locale(loc) +#if !UCONFIG_NO_BREAK_ITERATION + , iter(NULL) +#endif + { + if (caseLoc == 0) { // UCASE_LOC_UNKNOWN + setCaseLocale(locale.getBaseName()); } } -/* - * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. - * Do this fast because it is called with every function call. - */ -static inline void -setTempCaseMap(UCaseMap *csm, const char *locale) { - if(csm->csp==NULL) { - csm->csp=ucase_getSingleton(); +CaseMap::CaseMap(const Locale &locale, uint32_t options, UErrorCode &errorCode) : + CaseMap(locale, /* UCASE_LOC_UNKNOWN = */ 0, options, errorCode) {} + +// small optimization for localeID=="", a little slower otherwise +CaseMap::CaseMap(const char *localeID, uint32_t options, UErrorCode &errorCode) : + CaseMap(Locale::getRoot(), /* UCASE_LOC_ROOT = */ 1, options, errorCode) { + if (localeID == NULL || *localeID != 0) { + setLocale(localeID, errorCode); // not root } - if(locale!=NULL && locale[0]==0) { - csm->locale[0]=0; +} + +CaseMap::~CaseMap() { +#if !UCONFIG_NO_BREAK_ITERATION + delete iter; +#endif +} + +void CaseMap::setCaseLocale(const char *localeID) { + U_ASSERT(localeID != NULL); + caseLocale = UCASE_LOC_UNKNOWN; + ucase_getCaseLocale(localeID, &caseLocale); +} + +void CaseMap::setLocale(const char *localeID, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (localeID == NULL) { + locale = Locale::getDefault(); + localeID = locale.getBaseName(); } else { - ustrcase_setTempCaseMapLocale(csm, locale); + locale = Locale(localeID); + if (locale.isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + localeID = ""; + } } + setCaseLocale(localeID); } +U_NAMESPACE_END + +U_NAMESPACE_USE + /* public API functions */ U_CAPI int32_t U_EXPORT2 @@ -88,10 +90,9 @@ u_strToLower(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); + CaseMap csm(locale, 0, *pErrorCode); return ustrcase_mapWithOverlap( - &csm, UCASEMAP_BREAK_ITERATOR_NULL + csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToLower, *pErrorCode); @@ -102,10 +103,9 @@ u_strToUpper(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { - UCaseMap csm=UCASEMAP_INITIALIZER; - setTempCaseMap(&csm, locale); + CaseMap csm(locale, 0, *pErrorCode); return ustrcase_mapWithOverlap( - &csm, UCASEMAP_BREAK_ITERATOR_NULL + csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, ustrcase_internalToUpper, *pErrorCode); diff --git a/icu4c/source/test/cintltst/cstrcase.c b/icu4c/source/test/cintltst/cstrcase.c index 6c5e5b70c2d..e5cb74d1831 100644 --- a/icu4c/source/test/cintltst/cstrcase.c +++ b/icu4c/source/test/cintltst/cstrcase.c @@ -744,11 +744,12 @@ TestUCaseMap(void) { if(0!=strcmp(locale, "tr")) { log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale); } - /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */ + /* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */ ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode); locale=ucasemap_getLocale(csm); - if(0!=strcmp(locale, "i-klingon")) { - log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale); + if(0!=strncmp(locale, "i-klingon", 9)) { + log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n" + " does not start with \"i-klingon\"\n", locale); } errorCode=U_ZERO_ERROR; -- 2.40.0