From 7035c31b6d5dc07bce4a19b6a6dbf7a0b08ab362 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Mon, 9 Jan 2017 23:52:12 +0000 Subject: [PATCH] ICU-12410 add ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, ...), change UErrorCode pointer to reference internally and in new C++ API functions, add ucasemap_foldCaseWithEdits(), change UCaseMap.iter to C++ BreakIterator X-SVN-Rev: 39551 --- .../common/ucasemap_titlecase_brkiter.cpp | 17 +- icu4c/source/common/unicode/ucasemap.h | 122 +++++++++++-- icu4c/source/common/unicode/unistr.h | 6 +- icu4c/source/common/unistr_case.cpp | 32 ++-- icu4c/source/common/unistr_case_locale.cpp | 4 +- .../common/unistr_titlecase_brkiter.cpp | 15 +- icu4c/source/common/ustr_imp.h | 33 ++-- .../source/common/ustr_titlecase_brkiter.cpp | 63 +++++-- icu4c/source/common/ustrcase.cpp | 164 ++++++++++-------- icu4c/source/common/ustrcase_locale.cpp | 8 +- 10 files changed, 308 insertions(+), 156 deletions(-) diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp index ab61e21765b..4b607114171 100644 --- a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp +++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp @@ -32,14 +32,13 @@ U_NAMESPACE_USE U_CAPI const UBreakIterator * U_EXPORT2 ucasemap_getBreakIterator(const UCaseMap *csm) { - return csm->iter; + return reinterpret_cast(csm->iter); } U_CAPI void U_EXPORT2 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) { - // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code. - delete reinterpret_cast(csm->iter); - csm->iter=iterToAdopt; + delete csm->iter; + csm->iter=reinterpret_cast(iterToAdopt); } U_CAPI int32_t U_EXPORT2 @@ -49,15 +48,13 @@ ucasemap_utf8ToTitle(UCaseMap *csm, UErrorCode *pErrorCode) { UText utext=UTEXT_INITIALIZER; utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); + if(csm->iter==NULL) { + csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode); + } if(U_FAILURE(*pErrorCode)) { return 0; } - if(csm->iter==NULL) { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, - NULL, 0, - pErrorCode); - } - ubrk_setUText(csm->iter, &utext, pErrorCode); + csm->iter->setText(&utext, *pErrorCode); int32_t length=ucasemap_mapUTF8(csm, (uint8_t *)dest, destCapacity, (const uint8_t *)src, srcLength, diff --git a/icu4c/source/common/unicode/ucasemap.h b/icu4c/source/common/unicode/ucasemap.h index 2bff9ee1404..6f00b072e3a 100644 --- a/icu4c/source/common/unicode/ucasemap.h +++ b/icu4c/source/common/unicode/ucasemap.h @@ -22,15 +22,14 @@ #define __UCASEMAP_H__ #include "unicode/utypes.h" -#include "unicode/ustring.h" #include "unicode/localpointer.h" #if U_SHOW_CPLUSPLUS_API - #include "unicode/uobject.h" - #endif // U_SHOW_CPLUSPLUS_API +#include "unicode/ustring.h" + /** * \file * \brief C API: Unicode case mapping functions using a UCaseMap service object. @@ -89,6 +88,8 @@ ucasemap_close(UCaseMap *csm); U_NAMESPACE_BEGIN +class BreakIterator; + /** * \class LocalUCaseMapPointer * "Smart pointer" class, closes a UCaseMap via ucasemap_close(). @@ -107,6 +108,9 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); * Supports replacements, insertions, deletions in linear progression. * Does not support moving/reordering of text. * + * An Edits object tracks a separate UErrorCode, but ICU case mapping functions + * merge any such errors into their API's UErrorCode. + * * @internal ICU 59 technology preview */ class Edits final : public UMemory { @@ -154,21 +158,24 @@ public: /** * Adds a record for an unchanged segment of text. + * Normally called from inside ICU case mapping functions, not user code. * @internal ICU 59 technology preview */ void addUnchanged(int32_t unchangedLength); /** * Adds a record for a text replacement/insertion/deletion. + * Normally called from inside ICU case mapping functions, not user code. * @internal ICU 59 technology preview */ void addReplace(int32_t oldLength, int32_t newLength); /** * Sets the UErrorCode if an error occurred while recording edits. * Preserves older error codes in the outErrorCode. + * Normally called from inside ICU case mapping functions, not user code. * @return TRUE if U_FAILURE(outErrorCode) * @internal ICU 59 technology preview */ - UBool setErrorCode(UErrorCode &outErrorCode); + UBool copyErrorTo(UErrorCode &outErrorCode); /** * How much longer is the new text compared with the old text? @@ -463,7 +470,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). Can be NULL. - * @param pErrorCode Must be a valid pointer to an error code value, + * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, * in which case it will be greater than destCapacity. @@ -476,7 +483,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); /** * Uppercases the characters in a UTF-16 string and optionally records edits. @@ -495,7 +502,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm, * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). Can be NULL. - * @param pErrorCode Must be a valid pointer to an error code value, + * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful - or in case of a buffer overflow, * in which case it will be greater than destCapacity. @@ -508,7 +515,99 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with ucasemap_setOptions().) + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param csm UCaseMap service object. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string and used one or more times for iteration. + * If NULL, then a clone of ucasemap_getBreakIterator() is used. + * If that is NULL too, then a word break iterator for the locale is used + * (or something equivalent). + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToTitle + * @internal ICU 59 technology preview + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + +/** + * Case-folds the characters in a UTF-16 string and optionally records edits. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). Can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strFoldCase + * @see ucasemap_setOptions + * @see U_FOLD_CASE_DEFAULT + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @internal ICU 59 technology preview + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_foldCaseWithEdits(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); #endif // U_HIDE_INTERNAL_API #endif // U_SHOW_CPLUSPLUS_API @@ -600,7 +699,7 @@ ucasemap_toTitle(UCaseMap *csm, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode); -#endif +#endif // UCONFIG_NO_BREAK_ITERATION /** * Lowercase the characters in a UTF-8 string. @@ -762,10 +861,13 @@ ucasemap_utf8FoldCase(const UCaseMap *csm, */ typedef int32_t U_CALLCONV UStringCaseMapper(const UCaseMap *csm, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); #endif // U_SHOW_CPLUSPLUS_API #endif diff --git a/icu4c/source/common/unicode/unistr.h b/icu4c/source/common/unicode/unistr.h index 4b677b534f9..350828559da 100644 --- a/icu4c/source/common/unicode/unistr.h +++ b/icu4c/source/common/unicode/unistr.h @@ -3573,7 +3573,11 @@ private: * as in ustr_imp.h for ustrcase_map(). */ UnicodeString & - caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); + caseMap(const UCaseMap *csm, +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter, +#endif + UStringCaseMapper *stringCaseMapper); // ref counting void addRef(void); diff --git a/icu4c/source/common/unistr_case.cpp b/icu4c/source/common/unistr_case.cpp index e9771d1ded5..d48831b4b10 100644 --- a/icu4c/source/common/unistr_case.cpp +++ b/icu4c/source/common/unistr_case.cpp @@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start, //======================================== UnicodeString & -UnicodeString::caseMap(const UCaseMap *csm, +UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UStringCaseMapper *stringCaseMapper) { if(isEmpty() || !isWritable()) { // nothing to do @@ -121,7 +121,9 @@ UnicodeString::caseMap(const UCaseMap *csm, buffer = fUnion.fStackFields.fBuffer; capacity = US_STACKBUF_SIZE; } - newLength = stringCaseMapper(csm, buffer, capacity, oldArray, oldLength, NULL, &errorCode); + newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + buffer, capacity, + oldArray, oldLength, NULL, errorCode); if (U_SUCCESS(errorCode)) { setLength(newLength); return *this; @@ -140,22 +142,18 @@ UnicodeString::caseMap(const UCaseMap *csm, Edits edits; edits.setWriteUnchanged(FALSE); UChar replacementChars[200]; - stringCaseMapper(csm, replacementChars, UPRV_LENGTHOF(replacementChars), - oldArray, oldLength, &edits, &errorCode); - UErrorCode editsError = U_ZERO_ERROR; - if (edits.setErrorCode(editsError)) { - setToBogus(); - return *this; - } - newLength = oldLength + edits.lengthDelta(); + stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + replacementChars, UPRV_LENGTHOF(replacementChars), + oldArray, oldLength, &edits, errorCode); if (U_SUCCESS(errorCode)) { // Grow the buffer at most once, not for multiple doReplace() calls. + newLength = oldLength + edits.lengthDelta(); if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { return *this; } - for (Edits::Iterator iter = edits.getCoarseChangesIterator(); iter.next(errorCode);) { - doReplace(iter.destinationIndex(), iter.oldLength(), - replacementChars, iter.replacementIndex(), iter.newLength()); + for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { + doReplace(ei.destinationIndex(), ei.oldLength(), + replacementChars, ei.replacementIndex(), ei.newLength()); } if (U_FAILURE(errorCode)) { setToBogus(); @@ -163,6 +161,7 @@ UnicodeString::caseMap(const UCaseMap *csm, return *this; } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { // common overflow handling below + newLength = oldLength + edits.lengthDelta(); } else { setToBogus(); return *this; @@ -179,8 +178,9 @@ UnicodeString::caseMap(const UCaseMap *csm, return *this; } errorCode = U_ZERO_ERROR; - newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), - oldArray, oldLength, NULL, &errorCode); + newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + getArrayStart(), getCapacity(), + oldArray, oldLength, NULL, errorCode); if (bufferToDelete) { uprv_free(bufferToDelete); } @@ -197,7 +197,7 @@ UnicodeString::foldCase(uint32_t options) { UCaseMap csm=UCASEMAP_INITIALIZER; csm.csp=ucase_getSingleton(); csm.options=options; - return caseMap(&csm, ustrcase_internalFold); + return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_case_locale.cpp b/icu4c/source/common/unistr_case_locale.cpp index a01be5c30b3..4bf55777043 100644 --- a/icu4c/source/common/unistr_case_locale.cpp +++ b/icu4c/source/common/unistr_case_locale.cpp @@ -54,7 +54,7 @@ UnicodeString & UnicodeString::toLower(const Locale &locale) { UCaseMap csm=UCASEMAP_INITIALIZER; setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, ustrcase_internalToLower); + return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); } UnicodeString & @@ -66,7 +66,7 @@ UnicodeString & UnicodeString::toUpper(const Locale &locale) { UCaseMap csm=UCASEMAP_INITIALIZER; setTempCaseMap(&csm, locale.getName()); - return caseMap(&csm, ustrcase_internalToUpper); + return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); } U_NAMESPACE_END diff --git a/icu4c/source/common/unistr_titlecase_brkiter.cpp b/icu4c/source/common/unistr_titlecase_brkiter.cpp index c909133cdbe..05d38fb5822 100644 --- a/icu4c/source/common/unistr_titlecase_brkiter.cpp +++ b/icu4c/source/common/unistr_titlecase_brkiter.cpp @@ -22,22 +22,11 @@ #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" -#include "unicode/ubrk.h" #include "unicode/unistr.h" #include "unicode/ustring.h" #include "cmemory.h" #include "ustr_imp.h" -static int32_t U_CALLCONV -unistr_case_internalToTitle(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - icu::Edits *edits, - UErrorCode *pErrorCode) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, edits, pErrorCode); -} - /* * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. * Do this fast because it is called with every function call. @@ -80,8 +69,8 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t return *this; } } - csm.iter=reinterpret_cast(bi); - caseMap(&csm, unistr_case_internalToTitle); + bi->setText(*this); + caseMap(&csm, bi, ustrcase_internalToTitle); if(titleIter==NULL) { delete bi; } diff --git a/icu4c/source/common/ustr_imp.h b/icu4c/source/common/ustr_imp.h index 28bdca1e001..52a44545a0f 100644 --- a/icu4c/source/common/ustr_imp.h +++ b/icu4c/source/common/ustr_imp.h @@ -114,7 +114,7 @@ uprv_loadPropsData(UErrorCode *errorCode);*/ struct UCaseMap { const UCaseProps *csp; #if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *iter; /* We adopt the iterator, so we own it. */ + icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ #endif char locale[32]; int32_t locCache; @@ -123,8 +123,16 @@ struct UCaseMap { #if UCONFIG_NO_BREAK_ITERATION # define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 } +# define UCASEMAP_BREAK_ITERATOR_PARAM +# define UCASEMAP_BREAK_ITERATOR_UNUSED +# define UCASEMAP_BREAK_ITERATOR +# define UCASEMAP_BREAK_ITERATOR_NULL #else # define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 } +# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter, +# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *, +# define UCASEMAP_BREAK_ITERATOR iter, +# define UCASEMAP_BREAK_ITERATOR_NULL NULL, #endif U_CFUNC void @@ -132,51 +140,52 @@ ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale); /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, +ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, +ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); #if !UCONFIG_NO_BREAK_ITERATION /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV ustrcase_internalToTitle(const UCaseMap *csm, + icu::BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); #endif /** Implements UStringCaseMapper. */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, +ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); /** * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz(). * Implements argument checking. */ U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, +ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, icu::Edits *edits, - UErrorCode *pErrorCode); + UErrorCode &errorCode); /** * Common string case mapping implementation for old-fashioned u_strToXyz() functions @@ -184,11 +193,11 @@ ustrcase_map(const UCaseMap *csm, * Implements argument checking and internally works with an intermediate buffer if necessary. */ U_CFUNC int32_t -ustrcase_mapWithOverlap(const UCaseMap *csm, +ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, - UErrorCode *pErrorCode); + UErrorCode &errorCode); /** * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). diff --git a/icu4c/source/common/ustr_titlecase_brkiter.cpp b/icu4c/source/common/ustr_titlecase_brkiter.cpp index 463ba2c546f..bf4f252c541 100644 --- a/icu4c/source/common/ustr_titlecase_brkiter.cpp +++ b/icu4c/source/common/ustr_titlecase_brkiter.cpp @@ -22,6 +22,7 @@ #if !UCONFIG_NO_BREAK_ITERATION #include "unicode/brkiter.h" +#include "unicode/localpointer.h" #include "unicode/ubrk.h" #include "unicode/ucasemap.h" #include "cmemory.h" @@ -57,20 +58,51 @@ u_strToTitle(UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { UCaseMap csm=UCASEMAP_INITIALIZER; setTempCaseMap(&csm, locale); + icu::LocalPointer ownedIter; + icu::BreakIterator *iter; if(titleIter!=NULL) { - ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode); + iter=reinterpret_cast(titleIter); } else { - csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode); + iter=icu::BreakIterator::createWordInstance(icu::Locale(csm.locale), *pErrorCode); + ownedIter.adoptInstead(iter); } - int32_t length=ustrcase_mapWithOverlap( - &csm, + if(U_FAILURE(*pErrorCode)) { + return 0; + } + icu::UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_mapWithOverlap( + &csm, iter, dest, destCapacity, src, srcLength, - ustrcase_internalToTitle, pErrorCode); - if(titleIter==NULL && csm.iter!=NULL) { - ubrk_close(csm.iter); + ustrcase_internalToTitle, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + icu::LocalPointer ownedIter; + if(iter==NULL) { + if(csm->iter!=NULL) { + iter=csm->iter->clone(); + } else { + iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), errorCode); + } + ownedIter.adoptInsteadAndCheckErrorCode(iter, errorCode); + } + if(U_FAILURE(errorCode)) { + return 0; } - return length; + icu::UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_map( + csm, iter, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, edits, errorCode); } U_CAPI int32_t U_EXPORT2 @@ -78,16 +110,19 @@ ucasemap_toTitle(UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UErrorCode *pErrorCode) { - if(csm->iter!=NULL) { - ubrk_setText(csm->iter, src, srcLength, pErrorCode); - } else { - csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode); + if(csm->iter==NULL) { + csm->iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), *pErrorCode); + } + if(U_FAILURE(*pErrorCode)) { + return 0; } + icu::UnicodeString s(srcLength<0, src, srcLength); + csm->iter->setText(s); return ustrcase_map( - csm, + csm, csm->iter, dest, destCapacity, src, srcLength, - ustrcase_internalToTitle, NULL, pErrorCode); + ustrcase_internalToTitle, NULL, *pErrorCode); } #endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index 35b5d6370da..c437683c6f4 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -207,7 +207,7 @@ UBool Edits::growArray() { return TRUE; } -UBool Edits::setErrorCode(UErrorCode &outErrorCode) { +UBool Edits::copyErrorTo(UErrorCode &outErrorCode) { if (U_FAILURE(outErrorCode)) { return TRUE; } if (U_SUCCESS(errorCode)) { return FALSE; } outErrorCode = errorCode; @@ -374,6 +374,22 @@ UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) { return FALSE; } +namespace { + +int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, + Edits *edits, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + if (destIndex > destCapacity) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + } else if (edits != NULL) { + edits->copyErrorTo(errorCode); + } + } + return destIndex; +} + +} // namespace + U_NAMESPACE_END U_NAMESPACE_USE @@ -527,7 +543,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { int32_t locCache=csm->locCache; /* case mapping loop */ @@ -544,33 +560,26 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map, destIndex = appendResult(dest, destIndex, destCapacity, c, s, srcIndex - cpStart, edits); if (destIndex < 0) { - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } return destIndex; } #if !UCONFIG_NO_BREAK_ITERATION U_CFUNC int32_t U_CALLCONV -ustrcase_internalToTitle(const UCaseMap *csm, +ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { - if(U_FAILURE(*pErrorCode)) { + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return 0; } - // Use the C++ abstract base class to minimize dependencies. - // TODO: Change UCaseMap.iter to store a BreakIterator directly. - BreakIterator *bi=reinterpret_cast(csm->iter); - /* set up local variables */ int32_t locCache=csm->locCache; UCaseContext csc=UCASECONTEXT_INITIALIZER; @@ -586,9 +595,9 @@ ustrcase_internalToTitle(const UCaseMap *csm, int32_t idx; if(isFirstIndex) { isFirstIndex=FALSE; - idx=bi->first(); + idx=iter->first(); } else { - idx=bi->next(); + idx=iter->next(); } if(idx==UBRK_DONE || idx>srcLength) { idx=srcLength; @@ -632,7 +641,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, destIndex=appendUnchanged(dest, destIndex, destCapacity, src+prev, titleStart-prev, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -647,7 +656,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, destIndex=appendResult(dest, destIndex, destCapacity, c, s, titleLimit-titleStart, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } @@ -658,7 +667,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) { destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } if(edits!=NULL) { @@ -677,11 +686,11 @@ ustrcase_internalToTitle(const UCaseMap *csm, dest+destIndex, destCapacity-destIndex, src, &csc, titleLimit, idx, - edits, pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; + edits, errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + errorCode=U_ZERO_ERROR; } - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(errorCode)) { return destIndex; } } else { @@ -689,7 +698,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, destIndex=appendUnchanged(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit, edits); if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -700,10 +709,7 @@ ustrcase_internalToTitle(const UCaseMap *csm, prev=idx; } - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } #endif // !UCONFIG_NO_BREAK_ITERATION @@ -1191,7 +1197,7 @@ int32_t toUpper(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { int32_t locCache = UCASE_LOC_GREEK; int32_t destIndex=0; uint32_t state = 0; @@ -1310,7 +1316,7 @@ int32_t toUpper(const UCaseMap *csm, --numYpogegrammeni; } if(destIndex<0) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -1320,7 +1326,7 @@ int32_t toUpper(const UCaseMap *csm, destIndex = appendResult(dest, destIndex, destCapacity, c, s, nextIndex - i, edits); if (destIndex < 0) { - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } @@ -1328,10 +1334,7 @@ int32_t toUpper(const UCaseMap *csm, state = nextState; } - if(destIndex>destCapacity) { - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } } // namespace GreekUpper @@ -1340,47 +1343,49 @@ U_NAMESPACE_END /* functions available in the common library (for unistr_case.cpp) */ U_CFUNC int32_t U_CALLCONV -ustrcase_internalToLower(const UCaseMap *csm, +ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - return _caseMap( + int32_t destIndex = _caseMap( csm, ucase_toFullLower, dest, destCapacity, src, &csc, 0, srcLength, - edits, pErrorCode); + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t U_CALLCONV -ustrcase_internalToUpper(const UCaseMap *csm, +ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { int32_t locCache = csm->locCache; if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) { - return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode); + return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode); } UCaseContext csc=UCASECONTEXT_INITIALIZER; csc.p=(void *)src; csc.limit=srcLength; - return _caseMap( + int32_t destIndex = _caseMap( csm, ucase_toFullUpper, dest, destCapacity, src, &csc, 0, srcLength, - edits, pErrorCode); + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t U_CALLCONV -ustrcase_internalFold(const UCaseMap *csm, +ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { /* case mapping loop */ int32_t srcIndex = 0; int32_t destIndex = 0; @@ -1393,29 +1398,25 @@ ustrcase_internalFold(const UCaseMap *csm, destIndex = appendResult(dest, destIndex, destCapacity, c, s, srcIndex - cpStart, edits); if (destIndex < 0) { - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0; } } - // TODO: are these internal functions called where destIndex>destCapacity is not already checked? (see u_terminateUChars()) - if (destIndex > destCapacity) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - } - return destIndex; + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); } U_CFUNC int32_t -ustrcase_map(const UCaseMap *csm, +ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { int32_t destLength; /* check argument values */ - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(errorCode)) { return 0; } if( destCapacity<0 || @@ -1423,7 +1424,7 @@ ustrcase_map(const UCaseMap *csm, src==NULL || srcLength<-1 ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -1437,27 +1438,28 @@ ustrcase_map(const UCaseMap *csm, ((src>=dest && src<(dest+destCapacity)) || (dest>=src && dest<(src+srcLength))) ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } - destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode); - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); + destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + dest, destCapacity, src, srcLength, edits, errorCode); + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); } U_CFUNC int32_t -ustrcase_mapWithOverlap(const UCaseMap *csm, +ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, UStringCaseMapper *stringCaseMapper, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { UChar buffer[300]; UChar *temp; int32_t destLength; /* check argument values */ - if(U_FAILURE(*pErrorCode)) { + if(U_FAILURE(errorCode)) { return 0; } if( destCapacity<0 || @@ -1465,7 +1467,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, src==NULL || srcLength<-1 ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + errorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } @@ -1487,7 +1489,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, /* allocate a buffer */ temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); if(temp==NULL) { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + errorCode=U_MEMORY_ALLOCATION_ERROR; return 0; } } @@ -1495,10 +1497,11 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, temp=dest; } - destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, NULL, pErrorCode); + destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR + temp, destCapacity, src, srcLength, NULL, errorCode); if(temp!=dest) { /* copy the result string to the destination buffer */ - if (U_SUCCESS(*pErrorCode) && 0 < destLength && destLength <= destCapacity) { + if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) { u_memmove(dest, temp, destLength); } if(temp!=buffer) { @@ -1506,7 +1509,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm, } } - return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); } /* public API functions */ @@ -1520,10 +1523,10 @@ u_strFoldCase(UChar *dest, int32_t destCapacity, csm.csp=ucase_getSingleton(); csm.options=options; return ustrcase_mapWithOverlap( - &csm, + &csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalFold, pErrorCode); + ustrcase_internalFold, *pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -1531,12 +1534,12 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { return ustrcase_map( - csm, + csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToLower, edits, pErrorCode); + ustrcase_internalToLower, edits, errorCode); } U_CAPI int32_t U_EXPORT2 @@ -1544,12 +1547,25 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm, UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, icu::Edits *edits, - UErrorCode *pErrorCode) { + UErrorCode &errorCode) { + return ustrcase_map( + csm, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, edits, errorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_foldCaseWithEdits(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { return ustrcase_map( - csm, + csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToUpper, edits, pErrorCode); + ustrcase_internalFold, edits, errorCode); } /* case-insensitive string comparisons -------------------------------------- */ diff --git a/icu4c/source/common/ustrcase_locale.cpp b/icu4c/source/common/ustrcase_locale.cpp index 20749e1d66e..5e6e1418247 100644 --- a/icu4c/source/common/ustrcase_locale.cpp +++ b/icu4c/source/common/ustrcase_locale.cpp @@ -91,10 +91,10 @@ u_strToLower(UChar *dest, int32_t destCapacity, UCaseMap csm=UCASEMAP_INITIALIZER; setTempCaseMap(&csm, locale); return ustrcase_mapWithOverlap( - &csm, + &csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToLower, pErrorCode); + ustrcase_internalToLower, *pErrorCode); } U_CAPI int32_t U_EXPORT2 @@ -105,8 +105,8 @@ u_strToUpper(UChar *dest, int32_t destCapacity, UCaseMap csm=UCASEMAP_INITIALIZER; setTempCaseMap(&csm, locale); return ustrcase_mapWithOverlap( - &csm, + &csm, UCASEMAP_BREAK_ITERATOR_NULL dest, destCapacity, src, srcLength, - ustrcase_internalToUpper, pErrorCode); + ustrcase_internalToUpper, *pErrorCode); } -- 2.40.0