* zero or more case-ignorable characters.
*/
-#define is_a(c) ((c)=='a' || (c)=='A')
#define is_d(c) ((c)=='d' || (c)=='D')
#define is_e(c) ((c)=='e' || (c)=='E')
#define is_i(c) ((c)=='i' || (c)=='I')
#define is_l(c) ((c)=='l' || (c)=='L')
-#define is_n(c) ((c)=='n' || (c)=='N')
#define is_r(c) ((c)=='r' || (c)=='R')
#define is_t(c) ((c)=='t' || (c)=='T')
#define is_u(c) ((c)=='u' || (c)=='U')
* Accepts both 2- and 3-letter codes and accepts case variants.
*/
U_CFUNC int32_t
-ucase_getCaseLocale(const char *locale, int32_t *locCache) {
- int32_t result;
- char c;
-
- if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) {
- return result;
- }
-
- result=UCASE_LOC_ROOT;
-
+ucase_getCaseLocale(const char *locale) {
/*
* This function used to use uloc_getLanguage(), but the current code
* removes the dependency of this low-level code on uloc implementation code
* Because this code does not want to depend on uloc, the caller must
* pass in a non-NULL locale, i.e., may need to call uloc_getDefault().
*/
- c=*locale++;
- if(is_t(c)) {
- /* tr or tur? */
+ char c=*locale++;
+ // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+ // and for Chinese "zh": Very common but no special case mapping behavior.
+ // Then check lowercase vs. uppercase to reduce the number of comparisons
+ // for other locales without special behavior.
+ if(c=='e') {
+ /* el or ell? */
c=*locale++;
- if(is_u(c)) {
+ if(is_l(c)) {
c=*locale++;
- }
- if(is_r(c)) {
- c=*locale;
+ if(is_l(c)) {
+ c=*locale;
+ }
if(is_sep(c)) {
- result=UCASE_LOC_TURKISH;
+ return UCASE_LOC_GREEK;
}
}
- } else if(is_a(c)) {
- /* az or aze? */
- c=*locale++;
- if(is_z(c)) {
+ // en, es, ... -> root
+ } else if(c=='z') {
+ return UCASE_LOC_ROOT;
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
+#else
+# error Unknown charset family!
+#endif
+ // lowercase c
+ if(c=='t') {
+ /* tr or tur? */
c=*locale++;
- if(is_e(c)) {
+ if(is_u(c)) {
+ c=*locale++;
+ }
+ if(is_r(c)) {
c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
}
- if(is_sep(c)) {
- result=UCASE_LOC_TURKISH;
+ } else if(c=='a') {
+ /* az or aze? */
+ c=*locale++;
+ if(is_z(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
}
- }
- } else if(is_l(c)) {
- /* lt or lit? */
- c=*locale++;
- if(is_i(c)) {
+ } else if(c=='l') {
+ /* lt or lit? */
c=*locale++;
- }
- if(is_t(c)) {
- c=*locale;
- if(is_sep(c)) {
- result=UCASE_LOC_LITHUANIAN;
+ if(is_i(c)) {
+ c=*locale++;
}
- }
- } else if(is_e(c)) {
- /* el or ell? */
- c=*locale++;
- if(is_l(c)) {
+ if(is_t(c)) {
+ c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_LITHUANIAN;
+ }
+ }
+ } else if(c=='n') {
+ /* nl or nld? */
c=*locale++;
if(is_l(c)) {
+ c=*locale++;
+ if(is_d(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_DUTCH;
+ }
+ }
+ }
+ } else {
+ // uppercase c
+ // Same code as for lowercase c but also check for 'E'.
+ if(c=='T') {
+ /* tr or tur? */
+ c=*locale++;
+ if(is_u(c)) {
+ c=*locale++;
+ }
+ if(is_r(c)) {
c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
}
- if(is_sep(c)) {
- result=UCASE_LOC_GREEK;
+ } else if(c=='A') {
+ /* az or aze? */
+ c=*locale++;
+ if(is_z(c)) {
+ c=*locale++;
+ if(is_e(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_TURKISH;
+ }
}
- }
- } else if(is_n(c)) {
- /* nl or nld? */
- c=*locale++;
- if(is_l(c)) {
+ } else if(c=='L') {
+ /* lt or lit? */
c=*locale++;
- if(is_d(c)) {
+ if(is_i(c)) {
+ c=*locale++;
+ }
+ if(is_t(c)) {
c=*locale;
+ if(is_sep(c)) {
+ return UCASE_LOC_LITHUANIAN;
+ }
}
- if(is_sep(c)) {
- result=UCASE_LOC_DUTCH;
+ } else if(c=='E') {
+ /* el or ell? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_GREEK;
+ }
+ }
+ } else if(c=='N') {
+ /* nl or nld? */
+ c=*locale++;
+ if(is_l(c)) {
+ c=*locale++;
+ if(is_d(c)) {
+ c=*locale;
+ }
+ if(is_sep(c)) {
+ return UCASE_LOC_DUTCH;
+ }
}
}
}
-
- if(locCache!=NULL) {
- *locCache=result;
- }
- return result;
+ return UCASE_LOC_ROOT;
}
/*
ucase_toFullLower(const UCaseProps * /* unused csp */, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache) {
+ int32_t loc) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
UChar32 result=c;
if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
/* use hardcoded conditions and mappings */
- int32_t loc=ucase_getCaseLocale(locale, locCache);
/*
* Test for conditional mappings first
toUpperOrTitle(const UCaseProps * /* unused csp */, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache,
+ int32_t loc,
UBool upperNotTitle) {
// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
U_ASSERT(c >= 0);
if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
/* use hardcoded conditions and mappings */
- int32_t loc=ucase_getCaseLocale(locale, locCache);
-
if(loc==UCASE_LOC_TURKISH && c==0x69) {
/*
# Turkish and Azeri
ucase_toFullUpper(const UCaseProps * /* unused csp */, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache) {
- return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, TRUE);
+ int32_t caseLocale) {
+ return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, caseLocale, TRUE);
}
U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(const UCaseProps * /* unused csp */, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache) {
- return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, locale, locCache, FALSE);
+ int32_t caseLocale) {
+ return toUpperOrTitle(&ucase_props_singleton, c, iter, context, pString, caseLocale, FALSE);
}
/* case folding ------------------------------------------------------------- */
ucase_hasBinaryProperty(UChar32 c, UProperty which) {
/* case mapping properties */
const UChar *resultString;
- int32_t locCache;
switch(which) {
case UCHAR_LOWERCASE:
return (UBool)(UCASE_LOWER==ucase_getType(&ucase_props_singleton, c));
* start sets for normalization and case mappings.
*/
case UCHAR_CHANGES_WHEN_LOWERCASED:
- locCache=UCASE_LOC_ROOT;
- return (UBool)(ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
+ return (UBool)(ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
case UCHAR_CHANGES_WHEN_UPPERCASED:
- locCache=UCASE_LOC_ROOT;
- return (UBool)(ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
+ return (UBool)(ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
case UCHAR_CHANGES_WHEN_TITLECASED:
- locCache=UCASE_LOC_ROOT;
- return (UBool)(ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
+ return (UBool)(ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
/* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */
case UCHAR_CHANGES_WHEN_CASEMAPPED:
- locCache=UCASE_LOC_ROOT;
return (UBool)(
- ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
- ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0 ||
- ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, "", &locCache)>=0);
+ ucase_toFullLower(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullUpper(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 ||
+ ucase_toFullTitle(&ucase_props_singleton, c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0);
default:
return FALSE;
}
* Accepts both 2- and 3-letter codes and accepts case variants.
*/
U_CFUNC int32_t
-ucase_getCaseLocale(const char *locale, int32_t *locCache);
+ucase_getCaseLocale(const char *locale);
/* Casing locale types for ucase_getCaseLocale */
enum {
* @param context Pointer to be passed into iter.
* @param pString If the mapping result is a string, then the pointer is
* written to *pString.
- * @param locale Locale ID for locale-dependent mappings.
- * @param locCache Initialize to 0; may be used to cache the result of parsing
- * the locale ID for subsequent calls.
- * Can be NULL.
+ * @param caseLocale Case locale value from ucase_getCaseLocale().
* @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
*
* @see UCaseContextIterator
ucase_toFullLower(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache);
+ int32_t caseLocale);
U_CAPI int32_t U_EXPORT2
ucase_toFullUpper(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache);
+ int32_t caseLocale);
U_CAPI int32_t U_EXPORT2
ucase_toFullTitle(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache);
+ int32_t caseLocale);
U_CAPI int32_t U_EXPORT2
ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
UCaseMapFull(const UCaseProps *csp, UChar32 c,
UCaseContextIterator *iter, void *context,
const UChar **pString,
- const char *locale, int32_t *locCache);
+ int32_t caseLocale);
U_CDECL_END
#if !UCONFIG_NO_BREAK_ITERATION
iter(NULL),
#endif
- locCache(UCASE_LOC_UNKNOWN), options(opts) {
+ caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
ucasemap_setLocale(this, localeID, pErrorCode);
}
}
if (locale != NULL && *locale == 0) {
csm->locale[0] = 0;
- csm->locCache = UCASE_LOC_ROOT;
+ csm->caseLocale = UCASE_LOC_ROOT;
return;
}
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
if(U_SUCCESS(*pErrorCode)) {
- csm->locCache=UCASE_LOC_UNKNOWN;
- ucase_getCaseLocale(csm->locale, &csm->locCache);
+ csm->caseLocale=UCASE_LOC_UNKNOWN;
+ csm->caseLocale = ucase_getCaseLocale(csm->locale);
} else {
csm->locale[0]=0;
- csm->locCache = UCASE_LOC_ROOT;
+ csm->caseLocale = UCASE_LOC_ROOT;
}
}
}
continue;
}
- c=map(NULL, c, utf8_caseContextIterator, csc, &s, NULL, &caseLocale);
+ c=map(NULL, c, utf8_caseContextIterator, csc, &s, caseLocale);
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
/* fast path version of appendResult() for ASCII results */
dest[destIndex++]=(uint8_t)c2;
if(c>=0) {
csc.cpStart=titleStart;
csc.cpLimit=titleLimit;
- c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, NULL, &caseLocale);
+ c=ucase_toFullTitle(NULL, c, utf8_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s);
} else {
// Malformed UTF-8.
} else if(c>=0) {
const UChar *s;
UChar32 c2 = 0;
- c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale);
+ c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, caseLocale);
if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
/* fast path version of appendResult() for ASCII results */
dest[destIndex++]=(uint8_t)c2;
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(
- csm->locCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
+ csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToLower, pErrorCode);
const char *src, int32_t srcLength,
UErrorCode *pErrorCode) {
return ucasemap_mapUTF8(
- csm->locCache, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
+ csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToUpper, pErrorCode);
}
csm->iter->setText(&utext, *pErrorCode);
int32_t length=ucasemap_mapUTF8(
- csm->locCache, csm->options, csm->iter,
+ csm->caseLocale, csm->options, csm->iter,
(uint8_t *)dest, destCapacity,
(const uint8_t *)src, srcLength,
ucasemap_internalUTF8ToTitle, pErrorCode);
int32_t n = getRangeCount();
UChar32 result;
const UChar *full;
- int32_t locCache = 0;
for (int32_t i=0; i<n; ++i) {
UChar32 start = getRangeStart(i);
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (UChar32 cp=start; cp<=end; ++cp) {
- result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
+ result = ucase_toFullLower(csp, cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str);
- result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
+ result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str);
- result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
+ result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, UCASE_LOC_ROOT);
addCaseMapping(foldSet, result, full, str);
result = ucase_toFullFolding(csp, cp, &full, 0);
struct UCaseMap : public icu::UMemory {
/** Implements most of ucasemap_open(). */
UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
- /** Root locale. */
- UCaseMap(uint32_t opts) :
-#if !UCONFIG_NO_BREAK_ITERATION
- iter(NULL),
-#endif
- locCache(/* UCASE_LOC_ROOT= */ 1), options(opts) {
- locale[0] = 0;
- }
~UCaseMap();
#if !UCONFIG_NO_BREAK_ITERATION
icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */
#endif
char locale[32];
- int32_t locCache;
+ int32_t caseLocale;
uint32_t options;
};
#if UCONFIG_NO_BREAK_ITERATION
-# define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
# define UCASEMAP_BREAK_ITERATOR_PARAM
# define UCASEMAP_BREAK_ITERATOR_UNUSED
# define UCASEMAP_BREAK_ITERATOR
# define UCASEMAP_BREAK_ITERATOR_NULL
#else
-# define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
# define UCASEMAP_BREAK_ITERATOR iter,
U_NAMESPACE_USE
-// TODO: create casemap.cpp
-
/* functions available in the common library (for unistr_case.cpp) */
/* public API functions */
UnicodeString s(srcLength<0, src, srcLength);
csm->iter->setText(s);
return ustrcase_map(
- csm->locCache, csm->options, csm->iter,
+ csm->caseLocale, csm->options, csm->iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, NULL, *pErrorCode);
U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
const UChar *s;
- c=map(NULL, c, utf16_caseContextIterator, csc, &s, NULL, &caseLocale);
+ c=map(NULL, c, utf16_caseContextIterator, csc, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
srcIndex - cpStart, options, edits);
if (destIndex < 0) {
csc.cpStart=titleStart;
csc.cpLimit=titleLimit;
const UChar *s;
- c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s,
- NULL, &caseLocale);
+ c=ucase_toFullTitle(NULL, c, utf16_caseContextIterator, &csc, &s, caseLocale);
destIndex=appendResult(dest, destIndex, destCapacity, c, s,
titleLimit-titleStart, options, edits);
if(destIndex<0) {
}
} else {
const UChar *s;
- c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, NULL, &caseLocale);
+ c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, caseLocale);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
nextIndex - i, options, edits);
if (destIndex < 0) {
if (*locale == 0) {
return UCASE_LOC_ROOT;
} else {
- return ucase_getCaseLocale(locale, NULL);
+ return ucase_getCaseLocale(locale);
}
}
UnicodeString tmp;
const UChar *s;
UChar32 c;
- int32_t textPos, delta, result, locCache=0;
+ int32_t textPos, delta, result;
for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos;
c=text.char32At(textPos);
csc.cpLimit=textPos+=U16_LENGTH(c);
- result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
+ result=fMap(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
if(csc.b1 && isIncremental) {
// fMap() tried to look beyond the context limit
UnicodeString tmp;
const UChar *s;
- int32_t textPos, delta, result, locCache=0;
+ int32_t textPos, delta, result;
for(textPos=offsets.start; textPos<offsets.limit;) {
csc.cpStart=textPos;
type=ucase_getTypeOrIgnorable(fCsp, c);
if(type>=0) { // not case-ignorable
if(doTitle) {
- result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
+ result=ucase_toFullTitle(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
} else {
- result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, "", &locCache);
+ result=ucase_toFullLower(fCsp, c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
}
doTitle = (UBool)(type==0); // doTitle=isUncased